使用curl模拟不同ip多线程采集函数
function strCutByStr(&$str, $findStart, $findEnd = false, $encoding = 'utf-8'){
if(is_array($findStart)){if(count($findStart) === count($findEnd)){
foreach($findStart as $k => $v){
if(($result = strCutByStr($str, $v, $findEnd[$k], $encoding)) !== false){
return $result;
}
}
return false;
}else{
return false;
}
}
if(($start = mb_strpos($str, $findStart, 0, $encoding)) === false){
return false;
}
$start += mb_strlen($findStart, $encoding);
if($findEnd === false){
return mb_substr($str, $start, NULL, $encoding);
}
if(($length = mb_strpos($str, $findEnd, $start, $encoding)) === false){
return false;
}
return mb_substr($str, $start, $length - $start, $encoding);
}
function curl_multi($urls) {
if (!is_array($urls) or count($urls) == 0) {
return false;
}
$num=count($urls);
$curl = $curl2 = $text = array();
$handle = curl_multi_init();
function createCh($url) {
$ch = curl_init();
$ip = rand(10,30).".".rand(10,50).".".rand(1,253).".".rand(1,250);
$headers['CLIENT-IP'] = $ip;
$headers['X-FORWARDED-FOR'] = $ip;
$headerArr = array();
foreach( $headers as $n => $v ) {
$headerArr[] = $n .':' . $v;
}
curl_setopt ($ch, CURLOPT_HTTPHEADER, $headerArr);
curl_setopt ($ch, CURLOPT_URL, $url);
curl_setopt ($ch, CURLOPT_USERAGENT, "Mozilla/4.0");//设置头部
curl_setopt ($ch, CURLOPT_REFERER, $url); //设置来源
curl_setopt ($ch, CURLOPT_ENCODING, "gzip"); // 编码压缩
curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt ($ch, CURLOPT_FOLLOWLOCATION, 1);//是否采集301、302之后的页面
curl_setopt ($ch, CURLOPT_MAXREDIRS, 5);//查找次数,防止查找太深
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE); // 对认证证书来源的检查
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, FALSE); // 从证书中检查SSL加密算法是否存在
curl_setopt ($ch, CURLOPT_TIMEOUT, 20);
// curl_setopt ($ch, CURLOPT_HEADER, 0);
return $ch;
}
foreach($urls as $k=>$v){
$url=$urls[$k];
$curl[$k] = createCh($url);
curl_multi_add_handle ($handle,$curl[$k]);
}
$active = null;
do {
$mrc = curl_multi_exec($handle, $active);
} while ($mrc == CURLM_CALL_MULTI_PERFORM);
while ($active && $mrc == CURLM_OK) {
if (curl_multi_select($handle) != -1) {
usleep(100);
}
do {
$mrc = curl_multi_exec($handle, $active);
} while ($mrc == CURLM_CALL_MULTI_PERFORM);
}
foreach ($curl as $k => $v) {
if (curl_error($curl[$k]) == "") {
$text[$k] = (string) curl_multi_getcontent($curl[$k]);
$text[$k] = strCutByStr($text[$k], '<title>', '</title>');
$file="0501.txt";
$data=fopen($file,"a");
fwrite($data," $text[$k] \r\n");
fclose($data);
}
curl_multi_remove_handle($handle, $curl[$k]);
curl_close($curl[$k]);
}
curl_multi_close($handle);
return $text;
}