06 | curl_setopt( $ch , CURLOPT_URL, "http://blog.snsgou.com" ); |
07 | curl_setopt( $ch , CURLOPT_RETURNTRANSFER, 1); |
08 | curl_setopt( $ch , CURLOPT_HEADER, 0); |
11 | $output = curl_exec( $ch ); |
curl_setopt中的 CURLOPT_URL, CURLOPT_RETURNTRANSFER 等参数,请参考php文档手册,里面有详细说明!
现在得到$output内容…使用正则表达式匹配出你需要的内容。
07 | public $pagestring = '' ; |
10 | function __construct() { |
15 | function getUrlFile( $url ) { |
18 | if ( extension_loaded ( 'curl' )) { |
20 | curl_setopt( $ch , CURLOPT_URL, $url ); |
21 | curl_setopt( $ch , CURLOPT_RETURNTRANSFER, 1); |
22 | curl_setopt( $ch , CURLOPT_FOLLOWLOCATION, 1); |
23 | curl_setopt( $ch , CURLOPT_HEADER, 0); |
24 | $content = curl_exec( $ch ); |
27 | $content = file_get_contents ( $url ); |
29 | return trim( $content ); |
32 | function get_all_url( $code ) { |
33 | preg_match_all( '/<a.+?href=["|\']?([^>"\' ]+)["|\']?\s*[^>]*>([^>]+)<\/a>/is' , $code , $arr ); |
34 | return array ( 'name' => $arr [2], 'url' => $arr [1]); |
37 | function get_sub_content( $str , $start , $end ) { |
38 | $start = trim( $start ); |
40 | if ( $start == '' || $end == '' ) { |
43 | $str = explode ( $start , $str ); |
44 | $str = explode ( $end , $str [1]); |
49 | echo "<div style=\"border:1px solid #ddd;background:#F7F7F7;padding:5px 10px;\">\r\n" ; |
50 | echo "<pre style=\"font-family:Arial,Vrinda;font-size:14px;\">\r\n" ; |
52 | echo "\r\n</pre>\r\n" ; |
02 | define( 'ROOT_PATH' , str_replace ( '\\' , '/' , dirname( __FILE__ ))); |
05 | header( "Content-type: text/html; charset=gb2312" ); |
07 | $url = 'http://news.163.com/special/00013C0O/guojibjtj_03.html' ; |
09 | $gather = new Gather(); |
11 | $html = $gather ->getUrlFile( $url ); |
13 | $start = '<div class="bd clearfix">' ; |
14 | $end = '<div class="pages-1 mt25">' ; |
16 | $code = $gather ->get_sub_content( $html , $start , $end ); |
17 | $newsAry = $gather ->get_all_url( $code ); |
19 | $gather ->vd( $newsAry ); |
21 | $tarGetUrl = $newsAry [ 'url' ][0]; |
23 | $html = $gather ->getUrlFile( $tarGetUrl ); |
25 | $start = '<div id="endText">' ; |
26 | $end = '<span class="cDGray right" style="white-space:nowrap;">' ; |
28 | $code = $gather ->get_sub_content( $html , $start , $end ); |
29 | $killHtml = '<iframe src="http://g.163.com/r?site=netease&affiliate=news&cat=article&type=tvscreen200x300&location=1" width="200" height="300" frameborder="no" border="0" marginwidth="0" marginheight="0" scrolling="no"></iframe>' ; |
30 | $killHtml2 = '<a href="http://news.163.com/"><img src="http://img1.cache.netease.com/cnews/img07/end_i.gif" alt="netease" width="12" height="11" border="0" class="icon" /></a>' ; |
31 | $code = str_replace ( $killHtml , "" , $code ); |
32 | $code = str_replace ( $killHtml2 , "" , $code ); |