class BatchedCurlUtil
{
private $_mh;
private $_timeout = 30;
private $_handleArr = array();
/**
* 初始化curl_multi_init
*/
public function __construct($url_arr)
{
if(!is_array($url_arr))
{
return false;
}
$this->_mh = curl_multi_init();
foreach ($url_arr as $i => $url)
{
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HTTPHEADER, array('Accept-Language:zh-CN,zh;q=0.8'));
curl_setopt($ch, CURLOPT_HEADER, 0); //启用时会将头文件的信息作为数据流输出。此处不需要
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_TIMEOUT, $this->_timeout);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);//启用时会将服务器服务器返回的"Location: "放在header中递归的返回给服务器,使用CURLOPT_MAXREDIRS可以限定递归返回的数量
curl_setopt($ch, CURLOPT_MAXREDIRS, 5); //指定最多的HTTP重定向的数量,这个选项是和CURLOPT_FOLLOWLOCATION一起使用的。
curl_multi_add_handle($this->_mh, $ch);
$this->_handleArr[$url] = $ch;
}
return $this->_handleArr;
}
/**
* 关闭批量抓取
*/
public function __destruct()
{
curl_multi_close($this->_mh);
}
/**
* 批量抓取函数
*/
public function batchedGetHtml()
{
$runningSig = 0;
$dataArr = array();
if(!is_array($this->_handleArr))
{
return false;
}
print_r($this->_handleArr);exit;
do
{
curl_multi_exec($this->_mh, $runningSig);
}
while($runningSig > 0);
foreach($this->_handleArr as $key => $url)
{
$content = curl_multi_getcontent($url);
$dataArr[$key] = (curl_errno($url) == 0) ? $content : false;
}
return $dataArr;
}
}
$url = array(
'http://img01.taobaocdn.com/bao/uploaded/i1/19297024898783520/T1fKGoXt8iXXXXXXXX_!!0-item_pic.jpg',
'http://img01.taobaocdn.com/bao/uploaded/i1/1040439297/T2GfL1XdpXXXXXXXXX_!!1040439297.jpg',
'http://img01.taobaocdn.com/bao/uploaded/i1/15291036636308449/T1b6uHXyFcXXXXXXXX_!!0-item_pic.jpg',
'http://img03.taobaocdn.com/bao/uploaded/i3/272715291/T2uUcmXhlXXXXXXXXX_!!272715291.jpg',
'http://img03.taobaocdn.com/bao/uploaded/i3/272715291/T2uUcmXhlXXXXXXXXX_!!272715291.jpg',
'http://img03.taobaocdn.com/bao/uploaded/i3/272715291/T2uUcmXhlXXXXXXXXX_!!272715291.jpg'
);
以上url后三个是重复的,
print_r($this->_handleArr);的结果,可以看到重复的自动覆盖掉了(暂时这么理解)。
|
Array |
| ( |
| [http://img01.taobaocdn.com/bao/uploaded/i1/19297024898783520/T1fKGoXt8iXXXXXXXX_!!0-item_pic.jpg] => Resource id #3 |
| [http://img01.taobaocdn.com/bao/uploaded/i1/1040439297/T2GfL1XdpXXXXXXXXX_!!1040439297.jpg] => Resource id #4 |
| [http://img01.taobaocdn.com/bao/uploaded/i1/15291036636308449/T1b6uHXyFcXXXXXXXX_!!0-item_pic.jpg] => Resource id #5 |
| [http://img03.taobaocdn.com/bao/uploaded/i3/272715291/T2uUcmXhlXXXXXXXXX_!!272715291.jpg] => Resource id #8 |
| ) |
|
不会重复下载图片(当然也可以程序判断)。这个要注意的是,如果是批量替换某个文本里的图片就得注意,要一一对应,要先去除重复的。
用preg_replace();或者str_replace()数组形式替换,注意str_replace的问题