[转] 贴Snoopy.class.php代码学习参考


Snoopy.class.php 是一个关于HTTP协议访问操作的类库,主要是使用在 MagpieRSS 中用于远程文件的抓取,我原来转载的一篇文章大致有简单的介绍这个东西,今天无聊,把代码帖出来,大家参考学习。

 

<? php

/* ************************************************

Snoopy - the PHP net client
Author: Monte Ohrt <monte@ispi.net>
Copyright (c): 1999-2000 ispi, all rights reserved
Version: 1.01

 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

You may contact the author of Snoopy by e-mail at:
monte@ispi.net

Or, write to:
Monte Ohrt
CTO, ispi
237 S. 70th suite 220
Lincoln, NE 68510

The latest version of Snoopy can be obtained from:
http://snoopy.sourceforge.net/

************************************************
*/

class  Snoopy
{
    
/* *** Public variables *** */
    
    
/*  user definable vars  */

    
var   $host              =      " www.php.net " ;         //  host name we are connecting to
     var   $port              =      80 ;                     //  port we are connecting to
     var   $proxy_host          =      "" ;                     //  proxy host to use
     var   $proxy_port          =      "" ;                     //  proxy port to use
     var   $proxy_user          =      "" ;                     //  proxy user to use
     var   $proxy_pass          =      "" ;                     //  proxy password to use
    
    
var   $agent              =      " Snoopy v1.2.3 " ;     //  agent we masquerade as
     var      $referer          =      "" ;                     //  referer info to pass
     var   $cookies          =      array ();             //  array of cookies to pass
                                                // $cookies["username"]="joe";

     var      $rawheaders          =      array ();             //  array of raw headers to send
                                                // $rawheaders["Content-type"]="text/html";


    
var   $maxredirs          =      5 ;                     //  http redirection depth maximum. 0 = disallow
     var   $lastredirectaddr      =      "" ;                 //  contains address of last redirected address
     var      $offsiteok          =      true ;                 //  allows redirection off-site
     var   $maxframes          =      0 ;                     //  frame content depth maximum. 0 = disallow
     var   $expandlinks      =      true ;                 //  expand links to fully qualified URLs.
                                                // this only applies to fetchlinks()
                                                // submitlinks(), and submittext()

     var   $passcookies      =      true ;                 //  pass set cookies back through redirects
                                                // NOTE: this currently does not respect
                                                // dates, domains or paths.

    
    
var      $user              =      "" ;                     //  user for http authentication
     var      $pass              =      "" ;                     //  password for http authentication
    
    // http accept types

     var   $accept              =      " image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */* " ;
    
    
var   $results          =      "" ;                     //  where the content is put
        
    
var   $error              =      "" ;                     //  error messages sent here
     var      $response_code      =      "" ;                     //  response code returned from server
     var      $headers          =      array ();             //  headers returned from server sent here
     var      $maxlength          =      500000 ;                 //  max return data length (body)
     var   $read_timeout      =      0 ;                     //  timeout on read operations, in seconds
                                                // supported only since PHP 4 Beta 4
                                                // set to 0 to disallow timeouts

     var   $timed_out          =      false ;                 //  if a read operation timed out
     var      $status              =      0 ;                     //  http request status

    
var   $temp_dir          =      " /tmp " ;                 //  temporary directory that the webserver
                                                // has permission to write to.
                                                // under Windows, this should be C: emp


    
var      $curl_path          =      " /usr/local/bin/curl " ;
                                                
//  Snoopy will use cURL for fetching
                                                // SSL content if a full system path to
                                                // the cURL binary is supplied here.
                                                // set to false if you do not have
                                                // cURL installed. See http://curl.haxx.se
                                                // for details on installing cURL.
                                                // Snoopy does *not* use the cURL
                                                // library functions built into php,
                                                // as these functions are not stable
                                                // as of this Snoopy release.

    
    
/* *** Private variables *** */     
    
    
var      $_maxlinelen      =      4096 ;                 //  max line length (headers)
    
    
var   $_httpmethod      =      " GET " ;                 //  default http request method
     var   $_httpversion      =      " HTTP/1.0 " ;             //  default http request version
     var   $_submit_method      =      " POST " ;                 //  default submit method
     var   $_submit_type      =      " application/x-www-form-urlencoded " ;     //  default submit type
     var   $_mime_boundary      =     "" ;                     //  MIME boundary for multipart/form-data submit type
     var   $_redirectaddr      =      false ;                 //  will be set if page fetched is a redirect
     var   $_redirectdepth      =      0 ;                     //  increments on an http redirect
     var   $_frameurls          =       array ();             //  frame src urls
     var   $_framedepth      =      0 ;                     //  increments on frame depth
    
    
var   $_isproxy          =      false ;                 //  set if using a proxy server
     var   $_fp_timeout      =      30 ;                     //  timeout for socket connection

/* ======================================================================*
    Function:    fetch
    Purpose:    fetch the contents of a web page
                (and possibly other protocols in the
                future like ftp, nntp, gopher, etc.)
    Input:        $URI    the location of the page to fetch
    Output:        $this->results    the output text from the fetch
*======================================================================
*/

    
function  fetch( $URI )
    {
    
        
// preg_match("|^([^:]+)://([^:/]+)(:[d]+)*(.*)|",$URI,$URI_PARTS);
         $URI_PARTS   =   parse_url ( $URI );
        
if  ( ! empty ( $URI_PARTS [ " user " ]))
            
$this -> user  =   $URI_PARTS [ " user " ];
        
if  ( ! empty ( $URI_PARTS [ " pass " ]))
            
$this -> pass  =   $URI_PARTS [ " pass " ];
        
if  ( empty ( $URI_PARTS [ " query " ]))
            
$URI_PARTS [ " query " =   '' ;
        
if  ( empty ( $URI_PARTS [ " path " ]))
            
$URI_PARTS [ " path " =   '' ;
                
        
switch ( strtolower ( $URI_PARTS [ " scheme " ]))
        {
            
case   " http " :
                
$this -> host  =   $URI_PARTS [ " host " ];
                
if ( ! empty ( $URI_PARTS [ " port " ]))
                    
$this -> port  =   $URI_PARTS [ " port " ];
                
if ( $this -> _connect( $fp ))
                {
                    
if ( $this -> _isproxy)
                    {
                        
//  using proxy, send entire URI
                         $this -> _httprequest( $URI , $fp , $URI , $this -> _httpmethod);
                    }
                    
else
                    {
                        
$path   =   $URI_PARTS [ " path " ] . ( $URI_PARTS [ " query " ?   " ? " . $URI_PARTS [ " query " :   "" );
                        
//  no proxy, send only the path
                         $this -> _httprequest( $path ,   $fp ,   $URI ,   $this -> _httpmethod);
                    }
                    
                    
$this -> _disconnect( $fp );

                    
if ( $this -> _redirectaddr)
                    {
                        
/*  url was redirected, check if we've hit the max depth  */
                        
if ( $this -> maxredirs  >   $this -> _redirectdepth)
                        {
                            
//  only follow redirect if it's on this site, or offsiteok is true
                             if ( preg_match ( " |^http:// " . preg_quote ( $this -> host) . " |i " , $this -> _redirectaddr)  ||   $this -> offsiteok)
                            {
                                
/*  follow the redirect  */
                                
$this -> _redirectdepth ++ ;
                                
$this -> lastredirectaddr = $this -> _redirectaddr;
                                
$this -> fetch( $this -> _redirectaddr);
                            }
                        }
                    }

                    
if ( $this -> _framedepth  <   $this -> maxframes  &&   count ( $this -> _frameurls)  >   0 )
                    {
                        
$frameurls   =   $this -> _frameurls;
                        
$this -> _frameurls  =   array ();
                        
                        
while ( list ( , $frameurl =   each ( $frameurls ))
                        {
                            
if ( $this -> _framedepth  <   $this -> maxframes)
                            {
                                
$this -> fetch( $frameurl );
                                
$this -> _framedepth ++ ;
                            }
                            
else
                                
break ;
                        }
                    }                    
                }
                
else
                {
                    
return   false ;
                }
                
return   true ;                    
                
break ;
            
case   " https " :
                
if ( ! $this -> curl_path)
                    
return   false ;
                
if ( function_exists ( " is_executable " ))
                    
if  ( ! is_executable ( $this -> curl_path))
                        
return   false ;
                
$this -> host  =   $URI_PARTS [ " host " ];
                
if ( ! empty ( $URI_PARTS [ " port " ]))
                    
$this -> port  =   $URI_PARTS [ " port " ];
                
if ( $this -> _isproxy)
                {
                    
//  using proxy, send entire URI
                     $this -> _httpsrequest( $URI , $URI , $this -> _httpmethod);
                }
                
else
                {
                    
$path   =   $URI_PARTS [ " path " ] . ( $URI_PARTS [ " query " ?   " ? " . $URI_PARTS [ " query " :   "" );
                    
//  no proxy, send only the path
                     $this -> _httpsrequest( $path ,   $URI ,   $this -> _httpmethod);
                }

                
if ( $this -> _redirectaddr)
                {
                    
/*  url was redirected, check if we've hit the max depth  */
                    
if ( $this -> maxredirs  >   $this -> _redirectdepth)
                    {
                        
//  only follow redirect if it's on this site, or offsiteok is true
                         if ( preg_match ( " |^http:// " . preg_quote ( $this -> host) . " |i " , $this -> _redirectaddr)  ||   $this -> offsiteok)
                        {
                            
/*  follow the redirect  */
                            
$this -> _redirectdepth ++ ;
                            
$this -> lastredirectaddr = $this -> _redirectaddr;
                            
$this -> fetch( $this -> _redirectaddr);
                        }
                    }
                }

                
if ( $this -> _framedepth  <   $this -> maxframes  &&   count ( $this -> _frameurls)  >   0 )
                {
                    
$frameurls   =   $this -> _frameurls;
                    
$this -> _frameurls  =   array ();

                    
while ( list ( , $frameurl =   each ( $frameurls ))
                    {
                        
if ( $this -> _framedepth  <   $this -> maxframes)
                        {
                            
$this -> fetch( $frameurl );
                            
$this -> _framedepth ++ ;
                        }
                        
else
                            
break ;
                    }
                }                    
                
return   true ;                    
                
break ;
            
default :
                
//  not a valid protocol
                 $this -> error     =      ' Invalid protocol " ' . $URI_PARTS [ " scheme " ] . ' " ' ;
                
return   false ;
                
break ;
        }        
        
return   true ;
    }

/* ======================================================================*
    Function:    submit
    Purpose:    submit an http form
    Input:        $URI    the location to post the data
                $formvars    the formvars to use.
                    format: $formvars["var"] = "val";
                $formfiles  an array of files to submit
                    format: $formfiles["var"] = "/dir/filename.ext";
    Output:        $this->results    the text output from the post
*======================================================================
*/

    
function  submit( $URI ,   $formvars = "" ,   $formfiles = "" )
    {
        
unset ( $postdata );
        
        
$postdata   =   $this -> _prepare_post_body( $formvars ,   $formfiles );
            
        
$URI_PARTS   =   parse_url ( $URI );
        
if  ( ! empty ( $URI_PARTS [ " user " ]))
            
$this -> user  =   $URI_PARTS [ " user " ];
        
if  ( ! empty ( $URI_PARTS [ " pass " ]))
            
$this -> pass  =   $URI_PARTS [ " pass " ];
        
if  ( empty ( $URI_PARTS [ " query " ]))
            
$URI_PARTS [ " query " =   '' ;
        
if  ( empty ( $URI_PARTS [ " path " ]))
            
$URI_PARTS [ " path " =   '' ;

        
switch ( strtolower ( $URI_PARTS [ " scheme " ]))
        {
            
case   " http " :
                
$this -> host  =   $URI_PARTS [ " host " ];
                
if ( ! empty ( $URI_PARTS [ " port " ]))
                    
$this -> port  =   $URI_PARTS [ " port " ];
                
if ( $this -> _connect( $fp ))
                {
                    
if ( $this -> _isproxy)
                    {
                        
//  using proxy, send entire URI
                         $this -> _httprequest( $URI , $fp , $URI , $this -> _submit_method , $this -> _submit_type , $postdata );
                    }
                    
else
                    {
                        
$path   =   $URI_PARTS [ " path " ] . ( $URI_PARTS [ " query " ?   " ? " . $URI_PARTS [ " query " :   "" );
                        
//  no proxy, send only the path
                         $this -> _httprequest( $path ,   $fp ,   $URI ,   $this -> _submit_method ,   $this -> _submit_type ,   $postdata );
                    }
                    
                    
$this -> _disconnect( $fp );

                    
if ( $this -> _redirectaddr)
                    {
                        
/*  url was redirected, check if we've hit the max depth  */
                        
if ( $this -> maxredirs  >   $this -> _redirectdepth)
                        {                        
                            
if ( ! preg_match ( " |^ " . $URI_PARTS [ " scheme " ] . " ://| " ,   $this -> _redirectaddr))
                                
$this -> _redirectaddr  =   $this -> _expandlinks( $this -> _redirectaddr , $URI_PARTS [ " scheme " ] . " :// " . $URI_PARTS [ " host " ]);                        
                            
                            
//  only follow redirect if it's on this site, or offsiteok is true
                             if ( preg_match ( " |^http:// " . preg_quote ( $this -> host) . " |i " , $this -> _redirectaddr)  ||   $this -> offsiteok)
                            {
                                
/*  follow the redirect  */
                                
$this -> _redirectdepth ++ ;
                                
$this -> lastredirectaddr = $this -> _redirectaddr;
                                
if strpos $this -> _redirectaddr ,   " ? "  )  >   0  )
                                    
$this -> fetch( $this -> _redirectaddr);  //  the redirect has changed the request method from post to get
                                 else
                                    
$this -> submit( $this -> _redirectaddr , $formvars ,   $formfiles );
                            }
                        }
                    }

                    
if ( $this -> _framedepth  <   $this -> maxframes  &&   count ( $this -> _frameurls)  >   0 )
                    {
                        
$frameurls   =   $this -> _frameurls;
                        
$this -> _frameurls  =   array ();
                        
                        
while ( list ( , $frameurl =   each ( $frameurls ))
                        {                                                        
                            
if ( $this -> _framedepth  <   $this -> maxframes)
                            {
                                
$this -> fetch( $frameurl );
                                
$this -> _framedepth ++ ;
                            }
                            
else
                                
break ;
                        }
                    }                    
                    
                }
                
else
                {
                    
return   false ;
                }
                
return   true ;                    
                
break ;
            
case   " https " :
                
if ( ! $this -> curl_path)
                    
return   false ;
                
if ( function_exists ( " is_executable " ))
                    
if  ( ! is_executable ( $this -> curl_path))
                        
return   false ;
                
$this -> host  =   $URI_PARTS [ " host " ];
                
if ( ! empty ( $URI_PARTS [ " port " ]))
                    
$this -> port  =   $URI_PARTS [ " port " ];
                
if ( $this -> _isproxy)
                {
                    
//  using proxy, send entire URI
                     $this -> _httpsrequest( $URI ,   $URI ,   $this -> _submit_method ,   $this -> _submit_type ,   $postdata );
                }
                
else
                {
                    
$path   =   $URI_PARTS [ " path " ] . ( $URI_PARTS [ " query " ?   " ? " . $URI_PARTS [ " query " :   "" );
                    
//  no proxy, send only the path
                     $this -> _httpsrequest( $path ,   $URI ,   $this -> _submit_method ,   $this -> _submit_type ,   $postdata );
                }

                
if ( $this -> _redirectaddr)
                {
                    
/*  url was redirected, check if we've hit the max depth  */
                    
if ( $this -> maxredirs  >   $this -> _redirectdepth)
                    {                        
                        
if ( ! preg_match ( " |^ " . $URI_PARTS [ " scheme " ] . " ://| " ,   $this -> _redirectaddr))
                            
$this -> _redirectaddr  =   $this -> _expandlinks( $this -> _redirectaddr , $URI_PARTS [ " scheme " ] . " :// " . $URI_PARTS [ " host " ]);                        

                        
//  only follow redirect if it's on this site, or offsiteok is true
                         if ( preg_match ( " |^http:// " . preg_quote ( $this -> host) . " |i " , $this -> _redirectaddr)  ||   $this -> offsiteok)
                        {
                            
/*  follow the redirect  */
                            
$this -> _redirectdepth ++ ;
                            
$this -> lastredirectaddr = $this -> _redirectaddr;
                            
if strpos $this -> _redirectaddr ,   " ? "  )  >   0  )
                                
$this -> fetch( $this -> _redirectaddr);  //  the redirect has changed the request method from post to get
                             else
                                
$this -> submit( $this -> _redirectaddr , $formvars ,   $formfiles );
                        }
                    }
                }

                
if ( $this -> _framedepth  <   $this -> maxframes  &&   count ( $this -> _frameurls)  >   0 )
                {
                    
$frameurls   =   $this -> _frameurls;
                    
$this -> _frameurls  =   array ();

                    
while ( list ( , $frameurl =   each ( $frameurls ))
                    {                                                        
                        
if ( $this -> _framedepth  <   $this -> maxframes)
                        {
                            
$this -> fetch( $frameurl );
                            
$this -> _framedepth ++ ;
                        }
                        
else
                            
break ;
                    }
                }                    
                
return   true ;                    
                
break ;
                
            
default :
                
//  not a valid protocol
                 $this -> error     =      ' Invalid protocol " ' . $URI_PARTS [ " scheme " ] . ' " ' ;
                
return   false ;
                
break ;
        }        
        
return   true ;
    }

/* ======================================================================*
    Function:    fetchlinks
    Purpose:    fetch the links from a web page
    Input:        $URI    where you are fetching from
    Output:        $this->results    an array of the URLs
*======================================================================
*/

    
function  fetchlinks( $URI )
    {
        
if  ( $this -> fetch( $URI ))
        {            
            
if ( $this -> lastredirectaddr)
                
$URI   =   $this -> lastredirectaddr;
            
if ( is_array ( $this -> results))
            {
                
for ( $x = 0 ; $x < count ( $this -> results); $x ++ )
                    
$this -> results[ $x =   $this -> _striplinks( $this -> results[ $x ]);
            }
            
else
                
$this -> results  =   $this -> _striplinks( $this -> results);

            
if ( $this -> expandlinks)
                
$this -> results  =   $this -> _expandlinks( $this -> results ,   $URI );
            
return   true ;
        }
        
else
            
return   false ;
    }

/* ======================================================================*
    Function:    fetchform
    Purpose:    fetch the form elements from a web page
    Input:        $URI    where you are fetching from
    Output:        $this->results    the resulting html form
*======================================================================
*/

    
function  fetchform( $URI )
    {
        
        
if  ( $this -> fetch( $URI ))
        {            

            
if ( is_array ( $this -> results))
            {
                
for ( $x = 0 ; $x < count ( $this -> results); $x ++ )
                    
$this -> results[ $x =   $this -> _stripform( $this -> results[ $x ]);
            }
            
else
                
$this -> results  =   $this -> _stripform( $this -> results);
            
            
return   true ;
        }
        
else
            
return   false ;
    }
    
    
/* ======================================================================*
    Function:    fetchtext
    Purpose:    fetch the text from a web page, stripping the links
    Input:        $URI    where you are fetching from
    Output:        $this->results    the text from the web page
*======================================================================
*/

    
function  fetchtext( $URI )
    {
        
if ( $this -> fetch( $URI ))
        {            
            
if ( is_array ( $this -> results))
            {
                
for ( $x = 0 ; $x < count ( $this -> results); $x ++ )
                    
$this -> results[ $x =   $this -> _striptext( $this -> results[ $x ]);
            }
            
else
                
$this -> results  =   $this -> _striptext( $this -> results);
            
return   true ;
        }
        
else
            
return   false ;
    }

/* ======================================================================*
    Function:    submitlinks
    Purpose:    grab links from a form submission
    Input:        $URI    where you are submitting from
    Output:        $this->results    an array of the links from the post
*======================================================================
*/

    
function  submitlinks( $URI ,   $formvars = "" ,   $formfiles = "" )
    {
        
if ( $this -> submit( $URI , $formvars ,   $formfiles ))
        {            
            
if ( $this -> lastredirectaddr)
                
$URI   =   $this -> lastredirectaddr;
            
if ( is_array ( $this -> results))
            {
                
for ( $x = 0 ; $x < count ( $this -> results); $x ++ )
                {
                    
$this -> results[ $x =   $this -> _striplinks( $this -> results[ $x ]);
                    
if ( $this -> expandlinks)
                        
$this -> results[ $x =   $this -> _expandlinks( $this -> results[ $x ] , $URI );
                }
            }
            
else
            {
                
$this -> results  =   $this -> _striplinks( $this -> results);
                
if ( $this -> expandlinks)
                    
$this -> results  =   $this -> _expandlinks( $this -> results , $URI );
            }
            
return   true ;
        }
        
else
            
return   false ;
    }

/* ======================================================================*
    Function:    submittext
    Purpose:    grab text from a form submission
    Input:        $URI    where you are submitting from
    Output:        $this->results    the text from the web page
*======================================================================
*/

    
function  submittext( $URI ,   $formvars   =   "" ,   $formfiles   =   "" )
    {
        
if ( $this -> submit( $URI , $formvars ,   $formfiles ))
        {            
            
if ( $this -> lastredirectaddr)
                
$URI   =   $this -> lastredirectaddr;
            
if ( is_array ( $this -> results))
            {
                
for ( $x = 0 ; $x < count ( $this -> results); $x ++ )
                {
                    
$this -> results[ $x =   $this -> _striptext( $this -> results[ $x ]);
                    
if ( $this -> expandlinks)
                        
$this -> results[ $x =   $this -> _expandlinks( $this -> results[ $x ] , $URI );
                }
            }
            
else
            {
                
$this -> results  =   $this -> _striptext( $this -> results);
                
if ( $this -> expandlinks)
                    
$this -> results  =   $this -> _expandlinks( $this -> results , $URI );
            }
            
return   true ;
        }
        
else
            
return   false ;
    }

    

/* ======================================================================*
    Function:    set_submit_multipart
    Purpose:    Set the form submission content type to
                multipart/form-data
*======================================================================
*/
    
function  set_submit_multipart()
    {
        
$this -> _submit_type  =   " multipart/form-data " ;
    }

    
/* ======================================================================*
    Function:    set_submit_normal
    Purpose:    Set the form submission content type to
                application/x-www-form-urlencoded
*======================================================================
*/
    
function  set_submit_normal()
    {
        
$this -> _submit_type  =   " application/x-www-form-urlencoded " ;
    }

    
    

/* ======================================================================*
    Private functions
*======================================================================
*/
    
    
/* ======================================================================*
    Function:    _striplinks
    Purpose:    strip the hyperlinks from an html document
    Input:        $document    document to strip.
    Output:        $match        an array of the links
*======================================================================
*/

    
function  _striplinks( $document )
    {    
        
preg_match_all ( " '<s*as.*?hrefs*=s*            # find <a href=
                        (["'])?                    # find single or double quote
                        (?(1) (.*?)/1 | ([^s>]+))        # if quote found, match up to next matching
                                                    # quote, otherwise match up to next space
                        'isx
" , $document , $links );
                        

        
//  catenate the non-empty matches from the conditional subpattern

        
while ( list ( $key , $val =   each ( $links [ 2 ]))
        {
            
if ( ! empty ( $val ))
                
$match []  =   $val ;
        }                
        
        
while ( list ( $key , $val =   each ( $links [ 3 ]))
        {
            
if ( ! empty ( $val ))
                
$match []  =   $val ;
        }        
        
        
//  return the links
         return   $match ;
    }

/* ======================================================================*
    Function:    _stripform
    Purpose:    strip the form elements from an html document
    Input:        $document    document to strip.
    Output:        $match        an array of the links
*======================================================================
*/

    
function  _stripform( $document )
    {    
        
preg_match_all ( " '</?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=</?(option|select)[^<>]*>[ ]*)|(?=[ ]*))|(?=[ ]*))'Usi " , $document , $elements );
        
        
//  catenate the matches
         $match   =   implode ( " " , $elements [ 0 ]);
                
        
//  return the links
         return   $match ;
    }

    
    
/* ======================================================================*
    Function:    _striptext
    Purpose:    strip the text from an html document
    Input:        $document    document to strip.
    Output:        $text        the resulting text
*======================================================================
*/

    
function  _striptext( $document )
    {
        
        
//  I didn't use preg eval (//e) since that is only available in PHP 4.0.
        // so, list your entities one by one here. I included some of the
        // more common ones.

                                
        
$search   =   array ( " '<script[^>]*?>.*?</script>'si " ,      //  strip out javascript
                         " '<[/!]*?[^<>]*?>'si " ,              //  strip out html tags
                         " '([ ])[s]+' " ,                      //  strip out white space
                         " '&(quot|#34|#034|#x22);'i " ,          //  replace html entities
                         " '&(amp|#38|#038|#x26);'i " ,              //  added hexadecimal values
                         " '&(lt|#60|#060|#x3c);'i " ,
                        
" '&(gt|#62|#062|#x3e);'i " ,
                        
" '&(nbsp|#160|#xa0);'i " ,
                        
" '&(iexcl|#161);'i " ,
                        
" '&(cent|#162);'i " ,
                        
" '&(pound|#163);'i " ,
                        
" '&(copy|#169);'i " ,
                        
" '&(reg|#174);'i " ,
                        
" '&(deg|#176);'i " ,
                        
" '&(#39|#039|#x27);' " ,
                        
" '&(euro|#8364);'i " ,                  //  europe
                         " '&a(uml|UML);' " ,                      //  german
                         " '&o(uml|UML);' " ,
                        
" '&u(uml|UML);' " ,
                        
" '&A(uml|UML);' " ,
                        
" '&O(uml|UML);' " ,
                        
" '&U(uml|UML);' " ,
                        
" '&szlig;'i " ,
                        );
        
$replace   =   array (     "" ,
                            
"" ,
                            
" /1 " ,
                            
" " " ,
                            
" & " ,
                            
" < " ,
                            
" > " ,
                            
"   " ,
                            
chr ( 161 ) ,
                            
chr ( 162 ) ,
                            
chr ( 163 ) ,
                            
chr ( 169 ) ,
                            
chr ( 174 ) ,
                            
chr ( 176 ) ,
                            
chr ( 39 ) ,
                            
chr ( 128 ) ,
                            
" ?,
                            
" ?,
                            
" ?,
                            
" ?,
                            
" ?,
                            
" ?,
                            
" ?,
                        );
                    
        $text = preg_replace($search,$replace,$document);
                                
        return $text;
    }

/*======================================================================*
    Function:    _expandlinks
    Purpose:    expand each link into a fully qualified URL
    Input:        $links            the links to qualify
                $URI            the full URI to get the base from
    Output:        $expandedLinks    the expanded links
*======================================================================*/

    function _expandlinks($links,$URI)
    {
        
        preg_match(
" /^ [ ^ ? ] +/ " ,$URI,$match);

        $match = preg_replace(
" |/ [ ^ / . ] + . [ ^ / . ] + $ | " , "" ,$match[0]);
        $match = preg_replace(
" |/ $ | " , "" ,$match);
        $match_part = parse_url($match);
        $match_root =
        $match_part[
" scheme " ]. " : // ".$match_part["host"];
                
        
$search   =   array (      " |^http:// " . preg_quote ( $this -> host) . " |i " ,
                            
" |^(/)|i " ,
                            
" |^(?!http://)(?!mailto:)|i " ,
                            
" |/./| " ,
                            
" |/[^/]+/../| "
                        );
                        
        
$replace   =   array (     "" ,
                            
$match_root . " / " ,
                            
$match . " / " ,
                            
" / " ,
                            
" / "
                        );            
                
        
$expandedLinks   =   preg_replace ( $search , $replace , $links );

        
return   $expandedLinks ;
    }

/* ======================================================================*
    Function:    _httprequest
    Purpose:    go get the http data from the server
    Input:        $url        the url to fetch
                $fp            the current open file pointer
                $URI        the full URI
                $body        body contents to send if any (POST)
    Output:        
*======================================================================
*/
    
    
function  _httprequest( $url , $fp , $URI , $http_method , $content_type = "" , $body = "" )
    {
        
$cookie_headers   =   '' ;
        
if ( $this -> passcookies  &&   $this -> _redirectaddr)
            
$this -> setcookies();
            
        
$URI_PARTS   =   parse_url ( $URI );
        
if ( empty ( $url ))
            
$url   =   " / " ;
        
$headers   =   $http_method . "   " . $url . "   " . $this -> _httpversion . " " ;        
        
if ( ! empty ( $this -> agent))
            
$headers   .=   " User-Agent:  " . $this -> agent . " " ;
        
if ( ! empty ( $this -> host)  &&   ! isset ( $this -> rawheaders[ ' Host ' ])) {
            
$headers   .=   " Host:  " . $this -> host;
            
if ( ! empty ( $this -> port))
                
$headers   .=   " : " . $this -> port;
            
$headers   .=   " " ;
        }
        
if ( ! empty ( $this -> accept))
            
$headers   .=   " Accept:  " . $this -> accept . " " ;
        
if ( ! empty ( $this -> referer))
            
$headers   .=   " Referer:  " . $this -> referer . " " ;
        
if ( ! empty ( $this -> cookies))
        {            
            
if ( ! is_array ( $this -> cookies))
                
$this -> cookies  =  ( array ) $this -> cookies;
    
            
reset ( $this -> cookies);
            
if  (  count ( $this -> cookies)  >   0  ) {
                
$cookie_headers   .=   ' Cookie:  ' ;
                
foreach  (  $this -> cookies  as   $cookieKey   =>   $cookieVal  ) {
                
$cookie_headers   .=   $cookieKey . " = " . urlencode ( $cookieVal ) . " " ;
                }
                
$headers   .=   substr ( $cookie_headers , 0 ,- 2 .   " " ;
            } 
        }
        
if ( ! empty ( $this -> rawheaders))
        {
            
if ( ! is_array ( $this -> rawheaders))
                
$this -> rawheaders  =  ( array ) $this -> rawheaders;
            
while ( list ( $headerKey , $headerVal =   each ( $this -> rawheaders))
                
$headers   .=   $headerKey . " " . $headerVal . " " ;
        }
        
if ( ! empty ( $content_type )) {
            
$headers   .=   " Content-type: $content_type " ;
            
if  ( $content_type   ==   " multipart/form-data " )
                
$headers   .=   " ; boundary= " . $this -> _mime_boundary;
            
$headers   .=   " " ;
        }
        
if ( ! empty ( $body ))    
            
$headers   .=   " Content-length:  " . strlen ( $body ) . " " ;
        
if ( ! empty ( $this -> user)  ||   ! empty ( $this -> pass))    
            
$headers   .=   " Authorization: Basic  " . base64_encode ( $this -> user . " : " . $this -> pass) . " " ;
        
        
// add proxy auth headers
         if ( ! empty ( $this -> proxy_user))    
            
$headers   .=   ' Proxy-Authorization:  '   .   ' Basic  '   .   base64_encode ( $this -> proxy_user  .   ' : '   .   $this -> proxy_pass) . " " ;


        
$headers   .=   " " ;
        
        
//  set the read timeout if needed
         if  ( $this -> read_timeout  >   0 )
            
socket_set_timeout ( $fp ,   $this -> read_timeout);
        
$this -> timed_out  =   false ;
        
        
fwrite ( $fp , $headers . $body , strlen ( $headers . $body ));
        
        
$this -> _redirectaddr  =   false ;
        
unset ( $this -> headers);
                        
        
while ( $currentHeader   =   fgets ( $fp , $this -> _maxlinelen))
        {
            
if  ( $this -> read_timeout  >   0   &&   $this -> _check_timeout( $fp ))
            {
                
$this -> status =- 100 ;
                
return   false ;
            }
                
            
if ( $currentHeader   ==   " " )
                
break ;
                        
            
//  if a header begins with Location: or URI:, set the redirect
             if ( preg_match ( " /^(Location:|URI:)/i " , $currentHeader ))
            {
                
//  get URL portion of the redirect
                 preg_match ( " /^(Location:|URI:)[ ]+(.*)/i " , chop ( $currentHeader ) , $matches );
                
//  look for :// in the Location header to see if hostname is included
                 if ( ! preg_match ( " |://| " , $matches [ 2 ]))
                {
                    
//  no host in the path, so prepend
                     $this -> _redirectaddr  =   $URI_PARTS [ " scheme " ] . " :// " . $this -> host . " : " . $this -> port;
                    
//  eliminate double slash
                     if ( ! preg_match ( " |^/| " , $matches [ 2 ]))
                            
$this -> _redirectaddr  .=   " / " . $matches [ 2 ];
                    
else
                            
$this -> _redirectaddr  .=   $matches [ 2 ];
                }
                
else
                    
$this -> _redirectaddr  =   $matches [ 2 ];
            }
        
            
if ( preg_match ( " |^HTTP/| " , $currentHeader ))
            {
                
if ( preg_match ( " |^HTTP/[^s]*s(.*?)s| " , $currentHeader ,   $status ))
                {
                    
$this -> status =   $status [ 1 ];
                }                
                
$this -> response_code  =   $currentHeader ;
            }
                
            
$this -> headers[]  =   $currentHeader ;
        }

        
$results   =   '' ;
        
do  {
            
$_data   =   fread ( $fp ,   $this -> maxlength);
            
if  ( strlen ( $_data ==   0 ) {
                
break ;
            }
            
$results   .=   $_data ;
        } 
while ( true );

        
if  ( $this -> read_timeout  >   0   &&   $this -> _check_timeout( $fp ))
        {
            
$this -> status =- 100 ;
            
return   false ;
        }
        
        
//  check if there is a a redirect meta tag
        
        
if ( preg_match ( " '<meta[s]*http-equiv[^>]*?content[s]*=[s]*["']?d+;[s]*URL[s]*=[s]*([^"']*?)["']?>'i " , $results , $match ))

        {
            
$this -> _redirectaddr  =   $this -> _expandlinks( $match [ 1 ] , $URI );    
        }

        
//  have we hit our frame depth and is there frame src to fetch?
         if (( $this -> _framedepth  <   $this -> maxframes)  &&   preg_match_all ( " '<frames+.*src[s]*=['"]?([^'">]+)'i " , $results , $match ))
        {
            
$this -> results[]  =   $results ;
            
for ( $x = 0 $x < count ( $match [ 1 ]);  $x ++ )
                
$this -> _frameurls[]  =   $this -> _expandlinks( $match [ 1 ][ $x ] , $URI_PARTS [ " scheme " ] . " :// " . $this -> host);
        }
        
//  have we already fetched framed content?
         elseif ( is_array ( $this -> results))
            
$this -> results[]  =   $results ;
        
//  no framed content
         else
            
$this -> results  =   $results ;
        
        
return   true ;
    }

/* ======================================================================*
    Function:    _httpsrequest
    Purpose:    go get the https data from the server using curl
    Input:        $url        the url to fetch
                $URI        the full URI
                $body        body contents to send if any (POST)
    Output:        
*======================================================================
*/
    
    
function  _httpsrequest( $url , $URI , $http_method , $content_type = "" , $body = "" )
    {
        
if ( $this -> passcookies  &&   $this -> _redirectaddr)
            
$this -> setcookies();

        
$headers   =   array ();        
                    
        
$URI_PARTS   =   parse_url ( $URI );
        
if ( empty ( $url ))
            
$url   =   " / " ;
        
//  GET ... header not needed for curl
        //$headers[] = $http_method." ".$url." ".$this->_httpversion;        

         if ( ! empty ( $this -> agent))
            
$headers []  =   " User-Agent:  " . $this -> agent;
        
if ( ! empty ( $this -> host))
            
if ( ! empty ( $this -> port))
                
$headers []  =   " Host:  " . $this -> host . " : " . $this -> port;
            
else
                
$headers []  =   " Host:  " . $this -> host;
        
if ( ! empty ( $this -> accept))
            
$headers []  =   " Accept:  " . $this -> accept;
        
if ( ! empty ( $this -> referer))
            
$headers []  =   " Referer:  " . $this -> referer;
        
if ( ! empty ( $this -> cookies))
        {            
            
if ( ! is_array ( $this -> cookies))
                
$this -> cookies  =  ( array ) $this -> cookies;
    
            
reset ( $this -> cookies);
            
if  (  count ( $this -> cookies)  >   0  ) {
                
$cookie_str   =   ' Cookie:  ' ;
                
foreach  (  $this -> cookies  as   $cookieKey   =>   $cookieVal  ) {
                
$cookie_str   .=   $cookieKey . " = " . urlencode ( $cookieVal ) . " " ;
                }
                
$headers []  =   substr ( $cookie_str , 0 ,- 2 );
            }
        }
        
if ( ! empty ( $this -> rawheaders))
        {
            
if ( ! is_array ( $this -> rawheaders))
                
$this -> rawheaders  =  ( array ) $this -> rawheaders;
            
while ( list ( $headerKey , $headerVal =   each ( $this -> rawheaders))
                
$headers []  =   $headerKey . " " . $headerVal ;
        }
        
if ( ! empty ( $content_type )) {
            
if  ( $content_type   ==   " multipart/form-data " )
                
$headers []  =   " Content-type: $content_type; boundary= " . $this -> _mime_boundary;
            
else
                
$headers []  =   " Content-type: $content_type " ;
        }
        
if ( ! empty ( $body ))    
            
$headers []  =   " Content-length:  " . strlen ( $body );
        
if ( ! empty ( $this -> user)  ||   ! empty ( $this -> pass))    
            
$headers []  =   " Authorization: BASIC  " . base64_encode ( $this -> user . " : " . $this -> pass);
            
        
for ( $curr_header   =   0 $curr_header   <   count ( $headers );  $curr_header ++ ) {
            
$safer_header   =   strtr $headers [ $curr_header ] ,   " " " ,   "   "  );
            
$cmdline_params   .=   "  -H " " . $safer_header . " " " ;
        }
        
        
if ( ! empty ( $body ))
            
$cmdline_params   .=   "  -d "$body" " ;
        
        
if ( $this -> read_timeout  >   0 )
            
$cmdline_params   .=   "  -m  " . $this -> read_timeout;
        
        
$headerfile   =   tempnam ( $temp_dir ,   " sno " );

        
$safer_URI   =   strtr $URI ,   " " " ,   "   "  );  //  strip quotes from the URI to avoid shell access
         exec ( $this -> curl_path . "  -D "$headerfile" " . $cmdline_params . "  " " . $safer_URI . " " " , $results , $return );
        
        
if ( $return )
        {
            
$this -> error  =   " Error: cURL could not retrieve the document, error $return. " ;
            
return   false ;
        }
            
            
        
$results   =   implode ( " " , $results );
        
        
$result_headers   =   file ( " $headerfile " );
                        
        
$this -> _redirectaddr  =   false ;
        
unset ( $this -> headers);
                        
        
for ( $currentHeader   =   0 $currentHeader   <   count ( $result_headers );  $currentHeader ++ )
        {
            
            
//  if a header begins with Location: or URI:, set the redirect
             if ( preg_match ( " /^(Location: |URI: )/i " , $result_headers [ $currentHeader ]))
            {
                
//  get URL portion of the redirect
                 preg_match ( " /^(Location: |URI:)s+(.*)/ " , chop ( $result_headers [ $currentHeader ]) , $matches );
                
//  look for :// in the Location header to see if hostname is included
                 if ( ! preg_match ( " |://| " , $matches [ 2 ]))
                {
                    
//  no host in the path, so prepend
                     $this -> _redirectaddr  =   $URI_PARTS [ " scheme " ] . " :// " . $this -> host . " : " . $this -> port;
                    
//  eliminate double slash
                     if ( ! preg_match ( " |^/| " , $matches [ 2 ]))
                            
$this -> _redirectaddr  .=   " / " . $matches [ 2 ];
                    
else
                            
$this -> _redirectaddr  .=   $matches [ 2 ];
                }
                
else
                    
$this -> _redirectaddr  =   $matches [ 2 ];
            }
        
            
if ( preg_match ( " |^HTTP/| " , $result_headers [ $currentHeader ]))
                
$this -> response_code  =   $result_headers [ $currentHeader ];

            
$this -> headers[]  =   $result_headers [ $currentHeader ];
        }

        
//  check if there is a a redirect meta tag
        
        
if ( preg_match ( " '<meta[s]*http-equiv[^>]*?content[s]*=[s]*["']?d+;[s]*URL[s]*=[s]*([^"']*?)["']?>'i " , $results , $match ))
        {
            
$this -> _redirectaddr  =   $this -> _expandlinks( $match [ 1 ] , $URI );    
        }

        
//  have we hit our frame depth and is there frame src to fetch?
         if (( $this -> _framedepth  <   $this -> maxframes)  &&   preg_match_all ( " '<frames+.*src[s]*=['"]?([^'">]+)'i " , $results , $match ))
        {
            
$this -> results[]  =   $results ;
            
for ( $x = 0 $x < count ( $match [ 1 ]);  $x ++ )
                
$this -> _frameurls[]  =   $this -> _expandlinks( $match [ 1 ][ $x ] , $URI_PARTS [ " scheme " ] . " :// " . $this -> host);
        }
        
//  have we already fetched framed content?
         elseif ( is_array ( $this -> results))
            
$this -> results[]  =   $results ;
        
//  no framed content
         else
            
$this -> results  =   $results ;

        
unlink ( " $headerfile " );
        
        
return   true ;
    }

/* ======================================================================*
    Function:    setcookies()
    Purpose:    set cookies for a redirection
*======================================================================
*/
    
    
function  setcookies()
    {
        
for ( $x = 0 $x < count ( $this -> headers);  $x ++ )
        {
        
if ( preg_match ( ' /^set-cookie:[s]+([^=]+)=([^;]+)/i ' ,   $this -> headers[ $x ] , $match ))
            
$this -> cookies[ $match [ 1 ]]  =   urldecode ( $match [ 2 ]);
        }
    }

    
/* ======================================================================*
    Function:    _check_timeout
    Purpose:    checks whether timeout has occurred
    Input:        $fp    file pointer
*======================================================================
*/

    
function  _check_timeout( $fp )
    {
        
if  ( $this -> read_timeout  >   0 ) {
            
$fp_status   =   socket_get_status ( $fp );
            
if  ( $fp_status [ " timed_out " ]) {
                
$this -> timed_out  =   true ;
                
return   true ;
            }
        }
        
return   false ;
    }

/* ======================================================================*
    Function:    _connect
    Purpose:    make a socket connection
    Input:        $fp    file pointer
*======================================================================
*/
    
    
function  _connect( & $fp )
    {
        
if ( ! empty ( $this -> proxy_host)  &&   ! empty ( $this -> proxy_port))
            {
                
$this -> _isproxy  =   true ;
                
                
$host   =   $this -> proxy_host;
                
$port   =   $this -> proxy_port;
            }
        
else
        {
            
$host   =   $this -> host;
            
$port   =   $this -> port;
        }
    
        
$this -> status  =   0 ;
        
        
if ( $fp   =   fsockopen (
                    
$host ,
                    
$port ,
                    
$errno ,
                    
$errstr ,
                    
$this -> _fp_timeout
                    ))
        {
            
//  socket connection succeeded

            
return   true ;
        }
        
else
        {
            
//  socket connection failed
             $this -> status  =   $errno ;
            
switch ( $errno )
            {
                
case   - 3 :
                    
$this -> error = " socket creation failed (-3) " ;
                
case   - 4 :
                    
$this -> error = " dns lookup failure (-4) " ;
                
case   - 5 :
                    
$this -> error = " connection refused or timed out (-5) " ;
                
default :
                    
$this -> error = " connection failed ( " . $errno . " ) " ;
            }
            
return   false ;
        }
    }
/* ======================================================================*
    Function:    _disconnect
    Purpose:    disconnect a socket connection
    Input:        $fp    file pointer
*======================================================================
*/
    
    
function  _disconnect( $fp )
    {
        
return ( fclose ( $fp ));
    }

    
/* ======================================================================*
    Function:    _prepare_post_body
    Purpose:    Prepare post body according to encoding type
    Input:        $formvars  - form variables
                $formfiles - form upload files
    Output:        post body
*======================================================================
*/
    
    
function  _prepare_post_body( $formvars ,   $formfiles )
    {
        
settype ( $formvars ,   " array " );
        
settype ( $formfiles ,   " array " );
        
$postdata   =   '' ;

        
if  ( count ( $formvars ==   0   &&   count ( $formfiles ==   0 )
            
return ;
        
        
switch  ( $this -> _submit_type) {
            
case   " application/x-www-form-urlencoded " :
                
reset ( $formvars );
                
while ( list ( $key , $val =   each ( $formvars )) {
                    
if  ( is_array ( $val ||   is_object ( $val )) {
                        
while  ( list ( $cur_key ,   $cur_val =   each ( $val )) {
                            
$postdata   .=   urlencode ( $key ) . " []= " . urlencode ( $cur_val ) . " & " ;
                        }
                    } 
else
                        
$postdata   .=   urlencode ( $key ) . " = " . urlencode ( $val ) . " & " ;
                }
                
break ;

            
case   " multipart/form-data " :
                
$this -> _mime_boundary  =   " Snoopy " . md5 ( uniqid ( microtime ()));
                
                
reset ( $formvars );
                
while ( list ( $key , $val =   each ( $formvars )) {
                    
if  ( is_array ( $val ||   is_object ( $val )) {
                        
while  ( list ( $cur_key ,   $cur_val =   each ( $val )) {
                            
$postdata   .=   " -- " . $this -> _mime_boundary . " " ;
                            
$postdata   .=   " Content-Disposition: form-data; name="$key[]" " ;
                            
$postdata   .=   " $cur_val " ;
                        }
                    } 
else  {
                        
$postdata   .=   " -- " . $this -> _mime_boundary . " " ;
                        
$postdata   .=   " Content-Disposition: form-data; name="$key" " ;
                        
$postdata   .=   " $val " ;
                    }
                }
                
                
reset ( $formfiles );
                
while  ( list ( $field_name ,   $file_names =   each ( $formfiles )) {
                    
settype ( $file_names ,   " array " );
                    
while  ( list ( ,   $file_name =   each ( $file_names )) {
                        
if  ( ! is_readable ( $file_name ))  continue ;

                        
$fp   =   fopen ( $file_name ,   " r " );
                        
$file_content   =   fread ( $fp ,   filesize ( $file_name ));
                        
fclose ( $fp );
                        
$base_name   =   basename ( $file_name );

                        
$postdata   .=   " -- " . $this -> _mime_boundary . " " ;
                        
$postdata   .=   " Content-Disposition: form-data; name="$field_name"; filename="$base_name" " ;
                        
$postdata   .=   " $file_content " ;
                    }
                }
                
$postdata   .=   " -- " . $this -> _mime_boundary . " -- " ;
                
break ;
        }

        
return   $postdata ;
    }
}

?>

 

 

评论 4
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值