抓取1688产品的代码


<?php
$f_url = "http://detail.1688.com/offer/36618237146.html";
$url = "http://127.0.0.1/test/dom/x.php?url=".$f_url;

require("simple_html_dom.php");
//echo file_get_contents("http://www.taobao.com");
//exit;
$html = file_get_html($url);


//找到标题 
echo $html->find('h1',0)->plaintext;
echo "<br/>";
//价格:
$price_element = $html->find(".table-wrap .de-price-hd  tr");
foreach($price_element as $element){
	$qty = "";
	$price = "";
	if($element->find(".de-price-amount > span",0)){
		$qty = $element->find(".de-price-amount > span",0)->plaintext;
	}
	if($element->find(".reality-reveal .de-pnum-ep",0)){
		$price = $element->find(".reality-reveal .de-pnum-ep",0)->plaintext;
	}
	if($qty&&$price){
	echo "qty:".$qty."----price:".$price."<br/>";
	}

}

function getsubstr($attr){
	$pos = strpos($attr,'original":"')+11;
	$str = substr($attr,$pos);
	$pos = strpos($str,'"}');
	$str = substr($str,0,$pos);
	return $str;
}

//顶部图片
$img_array = array();
$top_image_element = $html->find(".tab-content-container .fd-clr .tab-trigger");
foreach($top_image_element as $element){
	//echo $element->data-imgs."<br/>";
	//var_dump($element);
	$attr = $element->attr;
	
	$ar = $attr['data-imgs'];
	echo getsubstr($ar)."<br>";
	$img_array[] = getsubstr($ar);
}

//颜色
$top_image_element = $html->find(".content-wrapper .content .leading  .value .unit-detail-spec-operator a");
foreach($top_image_element as $element){
	//echo $element->data-imgs."<br/>";
	//var_dump($element);
	$attr = $element->attr;
	echo $attr['title']."<br/>";
}

//尺码
$top_image_element = $html->find(".content-wrapper .content .list table tr td.name span");
foreach($top_image_element as $element){
	//echo $element->data-imgs."<br/>";
	//var_dump($element);
	$attr = $element->plaintext;
	echo $attr."<br>";
	//echo $attr['title']."<br/>";
}

//详细图片:

$top_image_element = $html->find(".de-description-detail #desc-lazyload-container",0);
$attr = $top_image_element->attr;
echo $attr['data-tfs-url']."<br>";

$html2 = file_get_html($attr['data-tfs-url']);
$ee = $html2->find("img");
foreach($ee as $e){
	echo $e->src."<br/>";
	$img_array[] = $e->src;
}

//foreach($html->find('h1',0) as $element) 
      // echo "img src='".$element->src."' />";
//	  echo $element->plaintext;
/*
//找到所有链接 
foreach($html->find('a') as $element) 
       echo $element->href . '<br>';
*/
//$ret = $html->find('div[id=hplogo]'); 

//var_dump($img_array);exit;

foreach($img_array as $url){
	$strpos = strpos($url,".com")+4;
	$host = substr($url,0,$strpos);
	$tarGET = substr($url,$strpos);
	$filename = "./img/".strrev(substr(strrev($url),0,strpos(strrev($url),"/")));

//	$host = "http://brqga.img37.wal8.com"; //你要访问的域名  
//	$tarGET = "/img37/309764_20121218193536/138409263835.jpg"; //你要访问的页面地址  
//	$filename = '1.png';  
	$reffer = "http://detail.1688.com"; //伪造来路页面  
	  
	$img=getremotefile($host.$tarGET,$reffer);   
	//echo $img; //直接输出图片显示到浏览器或  
	  
	//生成物理图片到磁盘 

	$fp=@fopen($filename,"w+"); 
	fwrite($fp,$img); 
  
  
 
}
fclose($fp);
function getremotefile($url, $refer = '') {   
	$option = array(   
	'http' => array(   
	'header' => "referer:$refer")   
	);   
	$context = stream_context_create($option);   
	return file_get_contents($url, false, $context);   
}  

?>





评论 6
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值