php curl 抓取 采集 1688 商品信息

<?php
set_time_limit(0);
date_default_timezone_set("Asia/Shanghai");
error_reporting(E_ERROR);
header("Content-type: text/html; charset=utf-8");

$url = 'https://detail.1688.com/offer/623649788459.html?spm=a262gg.11982098.jl3f2q8t.2.714a442aN2DSNF&resourceId=648056&udsPoolId=772896';
catchData($url);
exit;
function catchData($url) {
   		//header("Content-type: text/html; charset=gb2312");
   		$headers=array(
			"Accept: application/json, text/javascript, */*; q=0.01",
            "Content-Type: application/x-www-form-urlencoded; charset=UTF-8",
			"Origin:https://detail.1688.com",
			"Referer: $url",
			"User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36",
			
        );
	    $curl = curl_init();
		//设置抓取的url
		curl_setopt($curl, CURLOPT_URL, $url);
		curl_setopt($curl, CURLOPT_HTTPHEADER, $headers);//指定头部参数
		//设置头文件的信息作为数据流输出
		curl_setopt($curl, CURLOPT_HEADER, 0);
		//设置获取的信息以文件流的形式返回,而不是直接输出。
		curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
		curl_setopt($curl, CURLOPT_ACCEPT_ENCODING, "gzip,deflate");
		//重要!
		curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, FALSE); // https请求 不验证证书和hosts
		curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, FALSE);
		curl_setopt($curl,CURLOPT_USERAGENT,"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)"); //模拟浏览器代理
		
		//执行命令
	 	$data = curl_exec($curl);
		//关闭URL请求
		curl_close($curl);
	echo	$data = mb_convert_encoding($data, 'UTF-8', 'UTF-8,GBK,GB2312,BIG5');//使用该函数对结果进行转码
		return $data;
   	}

//
$url = 'https://detail.1688.com/offer/40009088544.html?spm=a262gg.11982098.jl3f2q8t.2.714a442aN2DSNF&resourceId=648056&udsPoolId=772896';
echo spider();

 function spider(){

        $header = header1();
        $header[] = 'Referer: https://mobile.yangkeduo.com';
 
       //设置浏览器信息
	   
        $header[] = 'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.70 Safari/537.36';
 	$headers=array(
			"Accept: application/json, text/javascript, */*; q=0.01",
            "Content-Type: application/x-www-form-urlencoded; charset=UTF-8",
			"Origin:https://detail.1688.com",
			"Referer: $url",
			"User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36",
			
        );
        $url = 'https://detail.1688.com/offer/40009088544.html';
 $url = 'https://detail.1688.com/offer/40009088544.html?spm=a262gg.11982098.jl3f2q8t.2.714a442aN2DSNF&resourceId=648056&udsPoolId=772896';
        $ch = curl_init();
        curl_setopt($ch, CURLOPT_URL, $url);
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
        curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE);
        curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, FALSE);
        curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
    //为防止爬取多次禁用Ip,可用代理ip
          //  curl_setopt($ch, CURLOPT_PROXY,'39.100.105.218'); //代理服务器地址
          // curl_setopt($ch, CURLOPT_PROXYPORT, '80'); //代理服务器端口
 	curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, FALSE); // https请求 不验证证书和hosts
		curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, FALSE);
        echo  $output = curl_exec($ch);
	   if (curl_errno($ch)) {
        echo  '错误:'. __LINE__.'+++++++++++'.curl_error($ch); 
        
    }
        curl_close($ch);
 
       echo  $encode = mb_detect_encoding($output, array("ASCII",'UTF-8',"GB2312","GBK",'BIG5'));
        if($encode == 'UTF-8'){
            echo $encode;
        }else{
            $output = mb_convert_encoding($output, 'UTF-8', $encode);
        }
        $result = json_decode($output, true);
 
 
}
 
 
//此函数提供了国内的IP地址
   function header1(){
       $ip_long = array(
           array('607649792', '608174079'), //36.56.0.0-36.63.255.255
           array('1038614528', '1039007743'), //61.232.0.0-61.237.255.255
           array('1783627776', '1784676351'), //106.80.0.0-106.95.255.255
           array('2035023872', '2035154943'), //121.76.0.0-121.77.255.255
           array('2078801920', '2079064063'), //123.232.0.0-123.235.255.255
           array('-1950089216', '-1948778497'), //139.196.0.0-139.215.255.255
           array('-1425539072', '-1425014785'), //171.8.0.0-171.15.255.255
           array('-1236271104', '-1235419137'), //182.80.0.0-182.92.255.255
           array('-770113536', '-768606209'), //210.25.0.0-210.47.255.255
           array('-569376768', '-564133889'), //222.16.0.0-222.95.255.255
       );
       $rand_key = mt_rand(0, 9);
       $ip= long2ip(mt_rand($ip_long[$rand_key][0], $ip_long[$rand_key][1]));
 $header[] = 'AccessToken:';
        $headers['CLIENT-IP'] =$ip;
        $headers['X-FORWARDED-FOR'] =$ip;
        $headers["VIA"] = $ip;
        $headers["REMOTE_ADDR"] = $ip;
 
//        $header[] = 'Referer: https://item.jd.com/'.$goods_id.'.html';
 
       $headerArr = array();
       foreach($headers as $n => $v ) {
           $headerArr[] = $n .': ' . $v;
       }
       return $headerArr;
   }

 

  • 1
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 6
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 6
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值