php中curl应用(多线程)
2011-03-21 18:31:46| 分类:
php
| 标签:
|字号
一般来说分一下几个步骤:
1.调用curl_multi_init
2.对地址循环调用
3.循环内部curl_setopt_array();
4.循环内部增加curl_multi_add_handle();
5.持续调用curl_multi_exec();
6.取得结果 curl_multi_getcontent();
7.循环内部curl_multi_remove_handle()
8.循环curl_close();
9.curl_multi_close() 返回结果
如果不明白,请看下面一个例子
//多线程蜘蛛爬虫
function curl_multi_fetch($urlarr=array()){
$ch=array();
$mh=curl_multi_init();
foreach ($urlarr as $key=>$url){
$timeout=30;
$ch[$key] = curl_init();
curl_setopt_array($ch[$key],
array(CURLOPT_URL =>$url ,CURLOPT_HEADER =>false,CURLOPT_RETURNTRANSFER => true,CURLOPT_TIMEOUT =>$timeout)
);
curl_multi_add_handle($mh,$ch[$key]);
}
/* wait for performing request */
do {
$mrc = curl_multi_exec($mh, $running);
} while (CURLM_CALL_MULTI_PERFORM == $mrc);
while ($running && $mrc == CURLM_OK){
if (curl_multi_select($mh,30) > -1){
do {
$mrc = curl_multi_exec($mh,$running);
}while (CURLM_CALL_MULTI_PERFORM == $mrc);
}
}
//get content
$result=$res=array();
foreach ($urlarr as $key=>$url){
if ($err = curl_error($ch[$nch]) == ''){
$res[$key]=curl_multi_getcontent($ch[$key]);
$result[$key]=$res[$key];
}else{
error_log("curl error.");
}
curl_multi_remove_handle($mh,$ch[$key]);
curl_close($ch[$key]);
}
curl_multi_close($mh);
return $result;
}
function microtime_float(){
list($msec,$sec)=explode(" ",microtime());
return (floatval($msec) + floatval($sec));
}
$url_arr=array(
"taobao"=>"http://www.taobao.com",
"sohu"=>"http://www.sohu.com",
"sina"=>"http://www.sina.com.cn",
);
phpinfo();
exit;
$timestart=microtime_float();
$data=curl_multi_fetch($url_arr);
$timeend=microtime_float();
$totaltime=$timeend-$timestart;
echo "耗时:{$totaltime}";
var_dump($data);