function curl_get($url) {
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_HEADER, 1);
$result = curl_exec($ch);
$code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
if ($code != '404' && $result) {
return $result;
}
curl_close($ch);
}
//获取页面url链接
function get_page_urls($spider_page_result, $base_url) {
$get_url_result = preg_match_all("/\'\"\ ]*).*?>/", $spider_page_result, $out);
if ($get_url_result) {
return $out[1];
} else {
return;
}
}
/**模拟百度蜘蛛采集**/
function _GetContent( $url ){
$ch = curl_init();
$ip = '220.181.108.91'; // 百度蜘蛛
$timeout = 15;
curl_setopt($ch,CURLOPT_URL,$url);
curl_setopt($ch,CURLOPT_TIMEOUT,0);
//伪造百度蜘蛛IP
curl_setopt($ch,CURLOPT_HTTPHEADER,array('X-FORWARDED-FOR:'.$ip.'','CLIENT-IP:'.$ip.''));
//伪造百度蜘蛛头部
curl_setopt($ch,CURLOPT_USERAGENT,"Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)");
curl_setopt($ch,CURLOPT_RETURNTRANSFER,1);
curl_setopt($ch,CURLOPT_HEADER,0);
curl_setopt($ch,CURLOPT_CONNECTTIMEOUT,$timeout);
$content = curl_exec($ch);
if($content === false)
{//输出错误信息
$no = curl_errno($ch);
switch(trim($no))
{
case 28 : $error = '访问目标地址超时'; break;
default : $error = curl_error($ch); break;
}
echo $error;
}
else
{
$succ = true;
return $content;
}
}
function post($url, $data = array())
{
global $nochange;
$o = "";
foreach ($data as $k => $v) {
$o .= "$k=" . $v . "&";
}
$data = substr($o, 0, -1);
$ch = curl_init();
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, FALSE);
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_ENCODING, 'gzip,deflate');
curl_setopt($ch, CURLOPT_REFERER, $url);
curl_setopt($ch, CURLOPT_USERAGENT, $url);
curl_setopt($ch, CURLOPT_HEADER, false);
curl_setopt($ch, CURLOPT_HTTPHEADER, array('CLIENT-IP:' . get_rand_ip(), 'X-FORWARDED-FOR:' . get_rand_ip()));
curl_setopt($ch, CURLOPT_TIMEOUT, 30);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, $data);
$rs = curl_exec($ch);
$info = curl_getinfo($ch);
curl_close($ch);
$content_type = $info['content_type'];
header('content-type:' . $content_type);
if (nochange_url($content_type, $nochange_url) < 1) {
$rs = change_link($rs);
$rs = regstr($rs);
}
return $rs;
}
f
unction get($url)
{
global $user_curl, $user_agent, $user_client, $nochange;
if (!($cache = cache('r', $url))) {
if (function_exists('curl_init') && $user_curl == "1") {
$ch = curl_init();
if ($user_agent == 'baidu') {
if ($user_client == "mobile") {
$user_agent = 'Mozilla/5.0 (Linux;u;Android 4.2.2;zh-cn;) AppleWebKit/534.46 (KHTML,like Gecko) Version/5.1 Mobile Safari/10600.6.3 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)';
} elseif (is_mobile() && $user_client == "auto") {
$user_agent = 'Mozilla/5.0 (Linux;u;Android 4.2.2;zh-cn;) AppleWebKit/534.46 (KHTML,like Gecko) Version/5.1 Mobile Safari/10600.6.3 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)';
} elseif ($user_client == "pc" || !is_mobile()) {
$user_agent = 'Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)';
} else {
$user_agent = 'Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)';
}
} elseif ($user_agent == 'google') {
if ($user_client == "mobile") {
$user_agent = 'Mozilla/5.0 (iPhone; CPU iPhone OS 8_3 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12F70 Safari/600.1.4 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)';
} elseif (is_mobile() && $user_client == "auto") {
$user_agent = 'Mozilla/5.0 (iPhone; CPU iPhone OS 8_3 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12F70 Safari/600.1.4 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)';
} elseif ($user_client == "pc" || !is_mobile()) {
$user_agent = 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)';
} else {
$user_agent = 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)';
}
} elseif ($user_agent == 'yahoo') {
$user_agent = 'Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)';
} elseif ($user_agent == 'bing') {
if ($user_client == "mobile") {
$user_agent = 'Mozilla/5.0 (iPhone; CPU iPhone OS 7_0 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) Version/7.0 Mobile/11A465 Safari/9537.53 (compatible; bingbot/2.0; http://www.bing.com/bingbot.htm)';
} elseif (is_mobile() && $user_client == "auto") {
$user_agent = 'Mozilla/5.0 (iPhone; CPU iPhone OS 7_0 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) Version/7.0 Mobile/11A465 Safari/9537.53 (compatible; bingbot/2.0; http://www.bing.com/bingbot.htm)';
} elseif ($user_client == "pc" || !is_mobile()) {
$user_agent = 'Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)';
} else {
$user_agent = 'Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)';
}
} else {
$user_agent = 'Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)';
}
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, FALSE);
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_ENCODING, 'gzip,deflate');
curl_setopt($ch, CURLOPT_REFERER, $url);
curl_setopt($ch, CURLOPT_USERAGENT, $user_agent);
curl_setopt($ch, CURLOPT_HEADER, false);
curl_setopt($ch, CURLOPT_HTTPHEADER, array('CLIENT-IP:' . get_rand_ip(), 'X-FORWARDED-FOR:' . get_rand_ip()));
curl_setopt($ch, CURLOPT_TIMEOUT, 30);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
$rs = curl_exec($ch);
$info = curl_getinfo($ch);
curl_close($ch);
$content_type = $info['content_type'];
} else {
$rs = file_get_contents($url);
}
if (nochange_url($content_type, $nochange) < 1 && nochange_url($content_type, "jpg|jpeg|gif|png|bmp") < 1) {
$rs = change_link($rs);
$rs = regstr($rs);
}
if (nochange_url($content_type, "jpg|jpeg|gif|png|bmp") < 1) {
$cache = array('content_type' => $content_type, 'rs' => $rs);
cache('w', $url, $cache);
}
header('content-type:' . $content_type);
return $rs;
} else {
extract($cache);
header('content-type:' . $content_type);
return $rs;
}
}