PHP获取网页内容的几种方法

最新推荐文章于 2024-06-26 15:22:47 发布

wakice

最新推荐文章于 2024-06-26 15:22:47 发布

阅读量1.1w

点赞数 3

 
      方法1: 用 
      file_get_contents 
      以get方式获取内容  
     
 
      <?php 
     
 
      $url 
      = 
      'http://www.domain.com/?para=123' 
      ; 
     
 
      $html 
      =  
      file_get_contents 
      ( 
      $url 
      ); 
     
 
      echo 
      $html 
      ; 
     
 
      ?> 
     

         
     
 
      方法2：用 
      file_get_contents 
      函数,以post方式获取url 
     
 
      <?php 
     
 
      $url 
      =  
      'http://www.domain.com/test.php?id=123' 
      ; 
     
 
      $data 
      =  
      array 
      ( 
      'foo' 
      =>  
      'bar' 
      ); 
     
 
      $data 
      = http_build_query( 
      $data 
      ); 
     

         
     
 
      $opts 
      =  
      array 
      (  
     
 
      'http' 
      =>  
      array 
      (  
     
 
          
      'method' 
      =>  
      'POST' 
      , 
     
 
          
      'header' 
      => 
      "Content-type: application/x-www-form-urlencoded\r\n" 
       .  
     
 
                            
      "Content-Length: " 
       .  
      strlen 
      ( 
      $data 
      ) .  
      "\r\n" 
      , 
     
 
          
      'content' 
      =>  
      $data 
     
 
      ) 
     
 
      ); 
     
 
      $ctx 
      = stream_context_create( 
      $opts 
      ); 
     
 
      $html 
      = @ 
      file_get_contents 
      ( 
      $url 
      , 
      '' 
      , 
      $ctx 
      ); 
     

         
     
 
      如果需要再传递cookie数据,则把 
     
 
      'header' 
      => 
      "Content-type: application/x-www-form-urlencoded\r\n" 
       .  
     
 
                         
      "Content-Length: " 
       .  
      strlen 
      ( 
      $data 
      ) .  
      "\r\n" 
      , 
     
 
      修改为 
     
 
      'header' 
      => 
      "Content-type: application/x-www-form-urlencoded\r\n" 
       .  
     
 
                        
      "Content-Length: " 
       .  
      strlen 
      ( 
      $data 
      ) .  
      "\r\n" 
      . 
     
 
                        
      "cookie:cookie1=c1;cookie2=c2\r\n" 
      ;  
     
 
      即可 
     

         
     
 
      方法3: 用 
      fopen 
      打开url, 以get方式获取内容  
     
 
      <?php 
     
 
      $fp 
      =  
      fopen 
      ( 
      $url 
      , 
      'r' 
      ); 
     
 
      $header 
      = stream_get_meta_data( 
      $fp 
      ); 
      //获取报头信息 
     
 
      while 
      (! 
      feof 
      ( 
      $fp 
      )) {  
     
 
      $result 
      .=  
      fgets 
      ( 
      $fp 
      , 1024);  
     
 
      } 
     
 
      echo 
      "url header: {$header} <br>" 
      : 
     
 
      echo 
      "url body: $result" 
      ; 
     
 
      fclose( 
      $fp 
      ); 
     
 
      ?> 
     

         
     
 
      方法4: 用 
      fopen 
      打开url, 以post方式获取内容  
     
 
      <?php 
     
 
      $data 
      =  
      array 
      ( 
      'foo2' 
      =>  
      'bar2' 
      , 
      'foo3' 
      => 
      'bar3' 
      ); 
     
 
      $data 
      = http_build_query( 
      $data 
      ); 
     

         
     
 
      $opts 
      =  
      array 
      (  
     
 
      'http' 
      =>  
      array 
      (  
     
 
      'method' 
      =>  
      'POST' 
      , 
     
 
      'header' 
      => 
      "Content-type: application/x-www-form-urlencoded\r\nCookie:cook1=c3;cook2=c4\r\n" 
       .  
     
 
      "Content-Length: " 
       .  
      strlen 
      ( 
      $data 
      ) .  
      "\r\n" 
      , 
     
 
      'content' 
      =>  
      $data 
     
 
      ) 
     
 
      ); 
     

         
     
 
      $context 
      = stream_context_create( 
      $opts 
      ); 
     
 
      $html 
      =  
      fopen 
      ( 
      'http://www.test.com/zzzz.php?id=i3&id2=i4' 
      , 
      'rb' 
      ,false,  
      $context 
      ); 
     
 
      $w 
      = 
      fread 
      ( 
      $html 
      ,1024); 
     
 
      echo 
      $w 
      ; 
     
 
      ?> 
     

         
     
 
      方法5：用 
      fsockopen 
      函数打开url，以get方式获取完整的数据，包括header和body 
     
 
      <?php 
     
 
      function 
      get_url ( 
      $url 
      , 
      $cookie 
      =false) 
     
 
      { 
     
 
      $url 
      =  
      parse_url 
      ( 
      $url 
      ); 
     
 
      $query 
      =  
      $url 
      [path]. 
      "?" 
      . 
      $url 
      [query]; 
     
 
      echo 
      "Query:" 
      . 
      $query 
      ; 
     
 
      $fp 
      =  
      fsockopen 
      ( 
      $url 
      [host], 
      $url 
      [port]? 
      $url 
      [port]:80 ,  
      $errno 
      , 
      $errstr 
      , 30);  
     
 
      if 
      (! 
      $fp 
      ) {  
     
 
      return 
      false;  
     
 
      } 
      else 
      {  
     
 
      $request 
      =  
      "GET $query HTTP/1.1\r\n" 
      ; 
     
 
      $request 
      .=  
      "Host: $url[host]\r\n" 
      ; 
     
 
      $request 
      .=  
      "Connection: Close\r\n" 
      ; 
     
 
      if 
      ( 
      $cookie 
      ) 
      $request 
      .= 
      "Cookie:   $cookie\n" 
      ; 
     
 
      $request 
      .= 
      "\r\n" 
      ; 
     
 
      fwrite( 
      $fp 
      , 
      $request 
      ); 
     
 
      while 
      (!@ 
      feof 
      ( 
      $fp 
      )) {  
     
 
      $result 
      .= @ 
      fgets 
      ( 
      $fp 
      , 1024);  
     
 
      } 
     
 
      fclose( 
      $fp 
      ); 
     
 
      return 
      $result 
      ; 
     
 
      } 
     
 
      } 
     
 
      //获取url的html部分，去掉header 
     
 
      function 
      GetUrlHTML( 
      $url 
      , 
      $cookie 
      =false) 
     
 
      { 
     
 
      $rowdata 
      = get_url( 
      $url 
      , 
      $cookie 
      ); 
     
 
      if 
      ( 
      $rowdata 
      ) 
     
 
      { 
     
 
      $body 
      = 
      stristr 
      ( 
      $rowdata 
      , 
      "\r\n\r\n" 
      ); 
     
 
      $body 
      = 
      substr 
      ( 
      $body 
      ,4, 
      strlen 
      ( 
      $body 
      )); 
     
 
      return 
      $body 
      ; 
     
 
      } 
     

         
     
 
          
      return 
      false;  
     
 
      } 
     
 
      ?> 
     

         
     
 
      方法6：用 
      fsockopen 
      函数打开url，以POST方式获取完整的数据，包括header和body 
     
 
      <?php 
     
 
      function 
      HTTP_Post( 
      $URL 
      , 
      $data 
      , 
      $cookie 
      , 
      $referrer 
      = 
      "" 
      ) 
     
 
      { 
     

         
     
 
          
      // parsing the given URL  
     
 
      $URL_Info 
      = 
      parse_url 
      ( 
      $URL 
      ); 
     

         
     
 
          
      // Building referrer  
     
 
      if 
      ( 
      $referrer 
      == 
      "" 
      ) 
      // if not given use this script as referrer  
     
 
      $referrer 
      = 
      "111" 
      ; 
     

         
     
 
          
      // making string from $data  
     
 
      foreach 
      ( 
      $data 
      as 
       $key 
      => 
      $value 
      ) 
     
 
      $values 
      []= 
      "$key=" 
      .urlencode( 
      $value 
      ); 
     
 
      $data_string 
      =implode( 
      "&" 
      , 
      $values 
      ); 
     

         
     
 
          
      // Find out which port is needed - if not given use standard (=80)  
     
 
      if 
      (!isset( 
      $URL_Info 
      [ 
      "port" 
      ])) 
     
 
      $URL_Info 
      [ 
      "port" 
      ]=80; 
     

         
     
 
          
      // building POST-request:  
     
 
      $request 
      .= 
      "POST " 
      . 
      $URL_Info 
      [ 
      "path" 
      ]. 
      " HTTP/1.1\n" 
      ; 
     
 
      $request 
      .= 
      "Host: " 
      . 
      $URL_Info 
      [ 
      "host" 
      ]. 
      "\n" 
      ; 
     
 
      $request 
      .= 
      "Referer: $referer\n" 
      ; 
     
 
      $request 
      .= 
      "Content-type: application/x-www-form-urlencoded\n" 
      ; 
     
 
      $request 
      .= 
      "Content-length: " 
      . 
      strlen 
      ( 
      $data_string 
      ). 
      "\n" 
      ; 
     
 
      $request 
      .= 
      "Connection: close\n" 
      ; 
     

         
     
 
          
      $request 
      .= 
      "Cookie:   $cookie\n" 
      ; 
     

         
     
 
          
      $request 
      .= 
      "\n" 
      ; 
     
 
      $request 
      .= 
      $data_string 
      . 
      "\n" 
      ; 
     

         
     
 
          
      $fp 
      =  
      fsockopen 
      ( 
      $URL_Info 
      [ 
      "host" 
      ], 
      $URL_Info 
      [ 
      "port" 
      ]); 
     
 
      fputs 
      ( 
      $fp 
      , 
      $request 
      ); 
     
 
      while 
      (! 
      feof 
      ( 
      $fp 
      )) {  
     
 
      $result 
      .=  
      fgets 
      ( 
      $fp 
      , 1024);  
     
 
      } 
     
 
      fclose( 
      $fp 
      ); 
     

         
     
 
          
      return 
      $result 
      ; 
     
 
      } 
     

         
     
 
      ?> 
     

         
     
 
      方法7:使用curl库，使用curl库之前，可能需要查看一下php.ini是否已经打开了curl扩展 
     
 
      <?php 
     
 
      $ch 
      = curl_init();  
     
 
      $timeout 
      = 5;  
     
 
      curl_setopt ( 
      $ch 
      , CURLOPT_URL,  
      'http://www.domain.com/' 
      ); 
     
 
      curl_setopt ( 
      $ch 
      , CURLOPT_RETURNTRANSFER, 1);  
     
 
      curl_setopt ( 
      $ch 
      , CURLOPT_CONNECTTIMEOUT,  
      $timeout 
      ); 
     
 
      $file_contents 
      = curl_exec( 
      $ch 
      ); 
     
 
      curl_close( 
      $ch 
      ); 
     

         
     
 
      echo 
      $file_contents 
      ; 
     
 
      ?> 
     

php获得网页源代码抓取网页内容的几种方法

作者:admin 时间:2013-5-25 15:38:36 浏览: 21319

这里收集了3种利用php获得网页源代码抓取网页内容的方法，我们可以根据实际需要选用。

1、使用file_get_contents获得网页源代码

这个方法最常用，只需要两行代码即可，非常简单方便。

参考代码：

<?php
$fh= file_get_contents('http://www.webkaka.com/');
echo $fh;
?>

2、使用fopen获得网页源代码

这个方法用的人也不少，不过代码有点多。

参考代码：

<?php
$fh = fopen('http://www.webkaka.com/', 'r');
if($fh){
while(!feof($fh)) {
echo fgets($fh);
}
}
?>

3、使用curl获得网页源代码

使用curl获得网页源代码的做法，往往是需要更高要求的人使用，例如当你需要在抓取网页内容的同时，得到网页header信息，还有ENCODING编码的使用，USERAGENT的使用等等。

参考代码一：

<?php
// 创建一个新cURL资源
$ch = curl_init();
// 设置URL和相应的选项
curl_setopt($ch, CURLOPT_URL, "http://www.webkaka.com/");
curl_setopt($ch, CURLOPT_HEADER, false);
// 抓取URL并把它传递给浏览器
$data = curl_exec($ch);
echo $data;
//关闭cURL资源，并且释放系统资源
curl_close($ch);
?>

参考代码二：

<?php
$szUrl = "http://www.webkaka.com/";
$UserAgent = 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; SLCC1; .NET CLR 2.0.50727; .NET CLR 3.0.04506; .NET CLR 3.5.21022; .NET CLR 1.0.3705; .NET CLR 1.1.4322)';
$curl = curl_init();
curl_setopt($curl, CURLOPT_URL, $szUrl);
curl_setopt($curl, CURLOPT_HEADER, 0); //0表示不输出Header，1表示输出
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, false);
curl_setopt($curl, CURLOPT_ENCODING, '');
curl_setopt($curl, CURLOPT_USERAGENT, $UserAgent);
curl_setopt($curl, CURLOPT_FOLLOWLOCATION, 1);
$data = curl_exec($curl);
echo $data;
//echo curl_errno($curl); //返回0时表示程序执行成功如何从curl_errno返回值获取错误信息
exit();
?>