用php做采集的时候,有些网站会做防采集的,尤其是图片的采集,这时候需要在curl请求里边加上伪请求源的设置,下边是一个封装好的curl方法,直接用这个应该可以解决一部分问题;
function CurlGet($url)
{
$dir= pathinfo($url);
$host = $dir['dirname'];
$refer= $host.'/';
$curl = curl_init($url);
curl_setopt ($curl, CURLOPT_REFERER, $refer); //关键就在这里伪造来源地址
curl_setopt($curl, CURLOPT_CUSTOMREQUEST, 'GET');
curl_setopt($curl, CURLOPT_HEADER, false);
curl_setopt($curl, CURLOPT_USERAGENT, 'Mozilla/4.0 (compatible; MSIE 6.0; SeaPort/1.2; Windows NT 5.1; SV1; InfoPath.2)'); //模拟浏览器访问
curl_setopt($curl, CURLOPT_COOKIEJAR, 'cookie.txt');
curl_setopt($curl, CURLOPT_COOKIEFILE, 'cookie.txt');
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($curl, CURLOPT_FOLLOWLOCATION, 0);
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, FALSE);
curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, FALSE);
curl_setopt($curl, CURLOPT_BINARYTRANSFER, 1);
curl_setopt($curl, CURLOPT_CONNECTTIMEOUT, 30); //超时时间
$values = curl_exec($curl);
curl_close($curl);
return ($values);
}