用PHP的 curl 抓取网页时报错:
The server understood the request, but is refusing to fulfill it because access is forbidden to the requested resource.
原来的抓取函数是这样的:
/**
* get\post方式的curl函数
*
* @param string $url 地址
* @param string $method 请求方式post/get
* @param string $data 数据
*
* @return [type] [description]
*/
public static function httpRequest($url, $method = 'GET', $data = '')
{
$ch = curl_init();
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
if ($method != 'GET') {
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, $data);
// curl_setopt($ch, CURLOPT_HTTPHEADER, array('Content-Type: application/json;charset=utf-8','Content-Length: ' . strlen($data)));
}
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_URL, $url);
$result = curl_exec($ch);
curl_close($ch);
return $result;
}
改成模拟浏览器的方式就ok了
/**
* get\post方式的curl函数
*
* @param string $url 地址
* @param string $method 请求方式post/get
* @param string $data 数据
*
* @return [type] [description]
*/
public static function httpRequest($url, $method = 'GET', $data = '')
{
$ch = curl_init();
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
if ($method != 'GET') {
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, $data);
// curl_setopt($ch, CURLOPT_HTTPHEADER, array('Content-Type: application/json;charset=utf-8','Content-Length: ' . strlen($data)));
}
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_URL, $url);
$user_agent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; .NET CLR 1.1.4322)";
curl_setopt($ch, CURLOPT_USERAGENT, $user_agent ); // 模拟用户使用的浏览器
$result = curl_exec($ch);
curl_close($ch);
return $result;
}