<?php
/**
* 抓取信息操作
*/
class Reptile{
/**
* 通过url获得网页内容【单线程】
* @param $url
* @return array
*/
function getContent($url, $proxy = 0){
$header = array(
//]]'Accept:*/*',
'Accept-Encoding:gzip',
/* 'Accept-Language:zh-CN,zh;q=0.8',
'Cache-Control:no-cache',
'Host:che.xin.com',*/
);
$ch = curl_init();
$timeout = 5;
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);//不自动输出内容
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
if($proxy == 1) {
curl_setopt($ch, CURLOPT_PROXY, '127.0.0.1:9090');
}
curl_setopt($ch, CURLOPT_USERAGENT, ':Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36');
//在需要用户检测的网页里需要增加下面两行
//curl_setopt($ch, CURLOPT_HTTPAUTH, CURLAUTH_ANY);
//curl_setopt($ch, CURLOPT_USERPWD, US_NAME.":".US_PWD);
$contents = curl_exec($ch);
curl_close($ch);
/*if(empty($contents)){
$contents = file_get_contents($url);
}*/
//echo $url;exit;
return gzdecode($contents);
}
/**
* 通过url获得网页内容【多线程】
* @param $urlArr 网页地址结合
* @return array
*/
function getContentByMulti($urlArr, $proxy = 0){
/*$urlArr = array(
"http://www.baidu.com/",
"http://www.baidu.com/"
);*/
$mh = curl_multi_init();
$header = array(
'Accept-Encoding:gzip',
);
foreach ($urlArr as $i => $url) {
$conn[$i]=curl_init($url);
curl_setopt($conn[$i],CURLOPT_RETURNTRANSFER,1);
curl_setopt($conn[$i], CURLOPT_CONNECTTIMEOUT, 5);
curl_setopt($conn[$i], CURLOPT_HTTPHEADER, $header);
curl_setopt($conn[$i], CURLOPT_USERAGENT, ':Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36');
if($proxy == 1) {
curl_setopt($conn[$i], CURLOPT_PROXY, '127.0.0.1:9090');
}
curl_multi_add_handle ($mh,$conn[$i]);
}
do { $n=curl_multi_exec($mh,$active);} while ($active);
// do {
// $mrc = curl_multi_exec($mh,$active);
// } while ($mrc == CURLM_CALL_MULTI_PERFORM);
// while ($active and $mrc == CURLM_OK) {
// if (curl_multi_select($mh) != -1) {
// do {
// $mrc = curl_multi_exec($mh, $active);
// } while ($mrc == CURLM_CALL_MULTI_PERFORM);
// }
// }
$res = array();
foreach ($urlArr as $i => $url) {
$res[$i]=mb_convert_encoding(gzdecode(curl_multi_getcontent($conn[$i])), 'UTF-8', 'UTF-8,GBK,GB2312,BIG5');
curl_close($conn[$i]);
}
return $res;
}
}//class end
/**
* 抓取信息操作
*/
class Reptile{
/**
* 通过url获得网页内容【单线程】
* @param $url
* @return array
*/
function getContent($url, $proxy = 0){
$header = array(
//]]'Accept:*/*',
'Accept-Encoding:gzip',
/* 'Accept-Language:zh-CN,zh;q=0.8',
'Cache-Control:no-cache',
'Host:che.xin.com',*/
);
$ch = curl_init();
$timeout = 5;
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);//不自动输出内容
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
if($proxy == 1) {
curl_setopt($ch, CURLOPT_PROXY, '127.0.0.1:9090');
}
curl_setopt($ch, CURLOPT_USERAGENT, ':Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36');
//在需要用户检测的网页里需要增加下面两行
//curl_setopt($ch, CURLOPT_HTTPAUTH, CURLAUTH_ANY);
//curl_setopt($ch, CURLOPT_USERPWD, US_NAME.":".US_PWD);
$contents = curl_exec($ch);
curl_close($ch);
/*if(empty($contents)){
$contents = file_get_contents($url);
}*/
//echo $url;exit;
return gzdecode($contents);
}
/**
* 通过url获得网页内容【多线程】
* @param $urlArr 网页地址结合
* @return array
*/
function getContentByMulti($urlArr, $proxy = 0){
/*$urlArr = array(
"http://www.baidu.com/",
"http://www.baidu.com/"
);*/
$mh = curl_multi_init();
$header = array(
'Accept-Encoding:gzip',
);
foreach ($urlArr as $i => $url) {
$conn[$i]=curl_init($url);
curl_setopt($conn[$i],CURLOPT_RETURNTRANSFER,1);
curl_setopt($conn[$i], CURLOPT_CONNECTTIMEOUT, 5);
curl_setopt($conn[$i], CURLOPT_HTTPHEADER, $header);
curl_setopt($conn[$i], CURLOPT_USERAGENT, ':Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36');
if($proxy == 1) {
curl_setopt($conn[$i], CURLOPT_PROXY, '127.0.0.1:9090');
}
curl_multi_add_handle ($mh,$conn[$i]);
}
do { $n=curl_multi_exec($mh,$active);} while ($active);
// do {
// $mrc = curl_multi_exec($mh,$active);
// } while ($mrc == CURLM_CALL_MULTI_PERFORM);
// while ($active and $mrc == CURLM_OK) {
// if (curl_multi_select($mh) != -1) {
// do {
// $mrc = curl_multi_exec($mh, $active);
// } while ($mrc == CURLM_CALL_MULTI_PERFORM);
// }
// }
$res = array();
foreach ($urlArr as $i => $url) {
$res[$i]=mb_convert_encoding(gzdecode(curl_multi_getcontent($conn[$i])), 'UTF-8', 'UTF-8,GBK,GB2312,BIG5');
curl_close($conn[$i]);
}
return $res;
}
}//class end