<?php
set_time_limit(0);
date_default_timezone_set("Asia/Shanghai");
error_reporting(E_ERROR);
header("Content-type: text/html; charset=utf-8");
$url = 'https://detail.1688.com/offer/623649788459.html?spm=a262gg.11982098.jl3f2q8t.2.714a442aN2DSNF&resourceId=648056&udsPoolId=772896';
catchData($url);
exit;
function catchData($url) {
//header("Content-type: text/html; charset=gb2312");
$headers=array(
"Accept: application/json, text/javascript, */*; q=0.01",
"Content-Type: application/x-www-form-urlencoded; charset=UTF-8",
"Origin:https://detail.1688.com",
"Referer: $url",
"User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36",
);
$curl = curl_init();
//设置抓取的url
curl_setopt($curl, CURLOPT_URL, $url);
curl_setopt($curl, CURLOPT_HTTPHEADER, $headers);//指定头部参数
//设置头文件的信息作为数据流输出
curl_setopt($curl, CURLOPT_HEADER, 0);
//设置获取的信息以文件流的形式返回,而不是直接输出。
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($curl, CURLOPT_ACCEPT_ENCODING, "gzip,deflate");
//重要!
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, FALSE); // https请求 不验证证书和hosts
curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, FALSE);
curl_setopt($curl,CURLOPT_USERAGENT,"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)"); //模拟浏览器代理
//执行命令
$data = curl_exec($curl);
//关闭URL请求
curl_close($curl);
echo $data = mb_convert_encoding($data, 'UTF-8', 'UTF-8,GBK,GB2312,BIG5');//使用该函数对结果进行转码
return $data;
}
//
$url = 'https://detail.1688.com/offer/40009088544.html?spm=a262gg.11982098.jl3f2q8t.2.714a442aN2DSNF&resourceId=648056&udsPoolId=772896';
echo spider();
function spider(){
$header = header1();
$header[] = 'Referer: https://mobile.yangkeduo.com';
//设置浏览器信息
$header[] = 'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.70 Safari/537.36';
$headers=array(
"Accept: application/json, text/javascript, */*; q=0.01",
"Content-Type: application/x-www-form-urlencoded; charset=UTF-8",
"Origin:https://detail.1688.com",
"Referer: $url",
"User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36",
);
$url = 'https://detail.1688.com/offer/40009088544.html';
$url = 'https://detail.1688.com/offer/40009088544.html?spm=a262gg.11982098.jl3f2q8t.2.714a442aN2DSNF&resourceId=648056&udsPoolId=772896';
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, FALSE);
curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
//为防止爬取多次禁用Ip,可用代理ip
// curl_setopt($ch, CURLOPT_PROXY,'39.100.105.218'); //代理服务器地址
// curl_setopt($ch, CURLOPT_PROXYPORT, '80'); //代理服务器端口
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, FALSE); // https请求 不验证证书和hosts
curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, FALSE);
echo $output = curl_exec($ch);
if (curl_errno($ch)) {
echo '错误:'. __LINE__.'+++++++++++'.curl_error($ch);
}
curl_close($ch);
echo $encode = mb_detect_encoding($output, array("ASCII",'UTF-8',"GB2312","GBK",'BIG5'));
if($encode == 'UTF-8'){
echo $encode;
}else{
$output = mb_convert_encoding($output, 'UTF-8', $encode);
}
$result = json_decode($output, true);
}
//此函数提供了国内的IP地址
function header1(){
$ip_long = array(
array('607649792', '608174079'), //36.56.0.0-36.63.255.255
array('1038614528', '1039007743'), //61.232.0.0-61.237.255.255
array('1783627776', '1784676351'), //106.80.0.0-106.95.255.255
array('2035023872', '2035154943'), //121.76.0.0-121.77.255.255
array('2078801920', '2079064063'), //123.232.0.0-123.235.255.255
array('-1950089216', '-1948778497'), //139.196.0.0-139.215.255.255
array('-1425539072', '-1425014785'), //171.8.0.0-171.15.255.255
array('-1236271104', '-1235419137'), //182.80.0.0-182.92.255.255
array('-770113536', '-768606209'), //210.25.0.0-210.47.255.255
array('-569376768', '-564133889'), //222.16.0.0-222.95.255.255
);
$rand_key = mt_rand(0, 9);
$ip= long2ip(mt_rand($ip_long[$rand_key][0], $ip_long[$rand_key][1]));
$header[] = 'AccessToken:';
$headers['CLIENT-IP'] =$ip;
$headers['X-FORWARDED-FOR'] =$ip;
$headers["VIA"] = $ip;
$headers["REMOTE_ADDR"] = $ip;
// $header[] = 'Referer: https://item.jd.com/'.$goods_id.'.html';
$headerArr = array();
foreach($headers as $n => $v ) {
$headerArr[] = $n .': ' . $v;
}
return $headerArr;
}