php多线程的使用,首先需要PHP5.3以上版本,并安装pthreads PHP扩展,可以使PHP真正的支持多线程,扩展如何安装请自行百度.
PHP扩展下载:https://github.com/krakjoe/pthreads
PHP手册文档:http://php.net/manual/zh/book.pthreads.php
在安装好扩展之后,就可以运用多线程了,下面贴个通过搜索结果抓取百度网盘内容的代码:
- <?php
- include 'include/CurlLoad.class.php';
-
-
-
-
-
- function vget($url) {
- $ret = BaiduSRLinksGet ( $url, 1 );
- if ($ret != null) {
- if (array_key_exists ( "links", $ret )) {
- $infos = array ();
- $number = count ( $ret ['links'] );
- for($i = 0; $i < $number; $i ++) {
- $thread_array [$i] = new baidu_thread_run ( $ret ['links'] [$i] );
- $thread_array [$i]->start ();
- }
- foreach ( $thread_array as $thread_array_key => $thread_array_value ) {
- while ( $thread_array [$thread_array_key]->isRunning () ) {
- usleep ( 10 );
- }
- if ($thread_array [$thread_array_key]->join ()) {
- $temp = $thread_array [$thread_array_key]->data;
- if ($temp != null)
- $infos ['res'] [] = $temp;
- }
- }
- $infos ['pages'] = $ret ['pages'];
- $infos ['status'] = "1";
- } else
- $infos = null;
- } else
- $infos = null;
- return $infos;
- }
-
-
-
-
-
-
-
-
-
- function BaiduSRLinksGet($url, $format = 0) {
- $html = CurlLoad::HtmlGet ( $url );
- if ($html == null)
- return null;
- try {
- preg_match_all ( "/"url":"(?<links>.*)"}/", $html, $rets );
- if (! array_key_exists ( 'links', $rets ))
- return null;
- $ret = array ();
- if ($format == 1) {
- $number = count ( $rets ['links'] );
- for($i = 0; $i < $number; $i ++) {
- $headr_temp = CurlLoad::Get_Headers ( $rets ['links'] [$i], 1 );
- if (array_key_exists ( "Location", $headr_temp ))
- $ret ['links'] [$i] = $headr_temp ['Location'];
- else
- $ret ['links'] = $rets ['links'];
- }
- } else
- $ret ['links'] = $rets ['links'];
- preg_match_all ( '/href="?/s?wd=site%3Apan.baidu.com%20(?<url>.+?)&ie=utf-8">/', $html, $out );
- unset ( $out ['url'] [0] );
- $number = count ( $out ['url'] );
- for($i = 1; $i < $number; $i ++) {
- preg_match_all ( '/&pn=(.*)/', $out ['url'] [$i], $temp );
- $ret ['pages'] [$temp [1] [0] / 10] = base64_encode ( $out ['url'] [$i] );
- }
- return $ret;
- } catch ( Exception $e ) {
- WriteLog ( $e );
- return null;
- }
- }
-
-
-
-
-
-
-
- function PanInfoGet($url) {
- $html = CurlLoad::HtmlGet ( $url );
- if ($html == null)
- return null;
- try {
- if (preg_match_all ( "/文件名:(?<name>.*) 文件大小:(?<size>.*) 分享者:(?<user>.*) 分享时间:(?<date>.*) 下载次数:(?<number>[0-9]+)/", $html, $ret ) == 0)
- return null;
- $rets ['name'] = $ret ['name'] [0];
- $rets ['size'] = $ret ['size'] [0];
- $rets ['user'] = $ret ['user'] [0];
- $rets ['date'] = $ret ['date'] [0];
- $rets ['number'] = $ret ['number'] [0];
- $rets ['link'] = $url;
- return $rets;
- } catch ( Exception $e ) {
- WriteLog ( $e );
- return null;
- }
- }
- function WriteLog($str) {
- $file = fopen ( "../error.log", "a+" );
- fwrite ( $file, "Warning:" . date ( "Y/m/d H:i:s" ) . ":" . $str . "rn" );
- fclose ( $file );
- }
-
-
-
-
-
- class baidu_thread_run extends Thread {
- public $url;
- public $data;
- public function __construct($url) {
- $this->url = $url;
- }
- public function run() {
- if (($url = $this->url)) {
- $this->data = PanInfoGet ( $url );
- }
- }
- }
- ?>
|