getinfo("http://rss.sina.com.cn/rollnews/news/gn_total.js",1); /* getinfo("http://rss.sina.com.cn/rollnews/news/gj_total.js",2); getinfo("http://rss.sina.com.cn/rollnews/news/sh_total.js",3); getinfo("http://rss.sina.com.cn/rollnews/sports/sports_total.js",4); getinfo("http://rss.sina.com.cn/rollnews/tech/tech1_total.js",5); getinfo("http://rss.sina.com.cn/rollnews/finance/finance1_news_total.js",6); getinfo("http://rss.sina.com.cn/rollnews/ent/ent_total.js",7); getinfo("http://rss.sina.com.cn/rollnews/jczs/jczs_total.js",8);*/ function getinfo($infourl,$catid) { $pagecontent=getwebcontent($infourl); preg_match_all("/title:/"(.*?)/"/", $pagecontent, $match); $titlearr=$match[1]; preg_match_all("/link:/"(.*?)/"/", $pagecontent, $match); $urlarr=$match[1]; for ($i=1;$i<count($urlarr);$i++){ //echo "go {$titlearr[$i-1]}/n"; $title=iconv("gbk","utf-8",$titlearr[$i-1]); $content=iconv("gbk","utf-8",getnewscontent($urlarr[$i])); echo $title."<br><br>"; echo $content=mysql_escape_string($content)."<br><br><hr><br><br>"; //if(!insertdb($title,$content,$catid)) break; } } function insertdb($title,$content,$catid){ //将数据写入你的库 } function getnewscontent($newsurl){ $newscontent=getwebcontent($newsurl); preg_match_all("/<div class=/"blkContainerSblkCon/" id=/"artibody/">([/s/S]*?)<!-- publish_helper_end -->/",$newscontent,$match); $content=preg_replace("/<a.*?<//a>/si","",$match[1][0]); $content=preg_replace("/<div style="/" mce_style="/""overflow:hidden;zoom:1;/" class=/"otherContent_01/">.*?<//div>/si","",$content); $content=preg_replace("/<div class=/"blk-video/">.*?<div class=/"clearcl/"><//div>/si","",$content); $content=str_replace("<div style="/" mce_style="/""clear:both;height:0;visibility:hiddden;overflow:hidden;/"></div>","",$content); return $content; } function curl_string ($url,$user_agent,$proxy){ $ch = curl_init(); curl_setopt ($ch, CURLOPT_PROXY, $proxy); curl_setopt ($ch, CURLOPT_URL, $url); curl_setopt ($ch, CURLOPT_USERAGENT, $user_agent); //curl_setopt ($ch, CURLOPT_COOKIEJAR, "c:cookie.txt"); curl_setopt ($ch, CURLOPT_HEADER, 0); curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt ($ch, CURLOPT_FOLLOWLOCATION, 1); curl_setopt ($ch, CURLOPT_TIMEOUT, 120); $result = curl_exec ($ch); curl_close($ch); return $result; } function getwebcontent($url,$user_agent = "Mozilla/5.0",$proxy = "http://192.168.0.101:3128"){ /* $ch = curl_init(); $timeout = 10; curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout); curl_setopt ($ch, CURLOPT_FOLLOWLOCATION, 1); $contents = trim(curl_exec($ch)); curl_close($ch); return $contents; */ //通过 代理服务器获取源码 $ch = curl_init(); curl_setopt ($ch, CURLOPT_PROXY, $proxy); curl_setopt ($ch, CURLOPT_URL, $url); curl_setopt ($ch, CURLOPT_USERAGENT, $user_agent); //curl_setopt ($ch, CURLOPT_COOKIEJAR, "c:cookie.txt"); curl_setopt ($ch, CURLOPT_HEADER, 0); curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt ($ch, CURLOPT_FOLLOWLOCATION, 1); curl_setopt ($ch, CURLOPT_TIMEOUT, 120); $result = curl_exec ($ch); curl_close($ch); return $result; }