function curl_file_get_contents($durl){
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $durl);
curl_setopt($ch, CURLOPT_TIMEOUT, 5);
curl_setopt($ch, CURLOPT_USERAGENT, _USERAGENT_);
curl_setopt($ch, CURLOPT_REFERER,_REFERER_);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
$r = curl_exec($ch);
curl_close($ch);
return $r;
}
//例1
$txt = file_get_contents('http://ju.taobao.com/tg/today_items.htm?spm=608.1000525.0.51&frontCatId=4000');
//$txt=curl_file_get_contents('http://ju.taobao.com/tg/today_items.htm?spm=608.1000525.0.51&frontCatId=4000');
$txt=mb_convert_encoding($txt,"UTF-8","GBK");
//$tpic = '/<img width=\"285\" data-ks-lazyload=\"([^\"]+)\"\/>/isu';
$tpic = '/<img width=\"285\" data-ks-lazyload=\"([^<>]+)\"\/>/isu';
$ttitle = '/<h3><a target=\"_blank\" title=\"([^<>]+)\" href/s';
//preg_match_all($tpic, $txt, $m);
//var_dump($m);
preg_match_all($tpic,$txt,$match1[]);
preg_match_all($ttitle,$txt,$match1[]);
for($i=0;$i<10;$i++){
//echo $match1[1][1][$i].'<br>';
echo '图片:<img src='.$match1[0][1][$i].'><br>';
echo '标题'.$match1[1][1][$i].'<br>';
}
//例2
$contents = file_get_contents("http://video.baidu.com/top/");
$contents = iconv("gb2312", "utf-8",$contents);
$paiming = '/<span class=\"color-v6 sum\">(\d+)<\/span>/s'; // 排名
$title = '/<span class=\"matter\" title=\"([^<>]+)\">/s'; // 标题
$url = '/<a statisic=\"name\" href=\'([^<>]+)\' class=\"block\" target=\"_blank\" >/s';
$num = '/<span class=\"color-v6 tr\">(\d+)<\/span>/s'; // 浏览量
preg_match_all($paiming,$contents,$match[]);
preg_match_all($title,$contents,$match[]);
preg_match_all($url,$contents,$match[]);
preg_match_all($num,$contents,$match[]);
//print_r($match);
echo '<table><tr><td>排名</td><td>电影名称</td><td>网址</td><td>点击量</td></tr>';
for($i=0;$i<10;$i++){
echo '<tr><td>'.$match[0][1][$i].'</td>
<td>'.$match[1][1][$i].'</td>
<td><a href='.$match[2][1][$i].' target="_blank">'.$match[2][1][$i].'</a></td>
<td>'.$match[3][1][$i].'</td></tr>';
}
echo '</table>';
php抓取网页内容
最新推荐文章于 2024-09-24 09:30:08 发布