exdown.php,DedeHttpDown PHP远程下载网页的类,增强版 2013-1-17修改_PHP教程

/**

* 织梦HTTP下载类

*

* @version $Id: dedehttpdown.class.php 1 11:42 2010年7月6日Z tianya $

* @package DedeCMS.Libraries

* @copyright Copyright (c) 2007 - 2010, DesDev, Inc.

* @modify author admin@zbphp.com

* @license http://help.dedecms.com/usersguide/license.html

* @link http://www.dedecms.com

*/

@set_time_limit(0);

class DedeHttpDown

{

var $m_url = '';

var $m_urlpath = '';

var $m_scheme = 'http';

var $m_host = '';

var $m_port = '80';

var $m_user = '';

var $m_pass = '';

var $m_path = '/';

var $m_query = '';

var $m_fp = '';

var $m_error = '';

var $m_httphead = '';

var $m_html = '';

var $dataLimit = 0;

var $m_puthead = '';

var $BaseUrlPath = '';

var $HomeUrl = '';

var $reTry = 0;

var $JumpCount = 0;

/**

* 初始化系统

*

* @access public

* @param string $url 需要下载的地址

* @return string

*/

function PrivateInit($url)

{

if($url=='') {

return ;

}

$urls = '';

$urls = @parse_url($url);

$this->m_url = $url;

if(is_array($urls))

{

$this->m_host = $urls["host"];

if(!emptyempty($urls["scheme"]))

{

$this->m_scheme = $urls["scheme"];

}

if(!emptyempty($urls["user"]))

{

$this->m_user = $urls["user"];

}

if(!emptyempty($urls["pass"]))

{

$this->m_pass = $urls["pass"];

}

if(!emptyempty($urls["port"]))

{

$this->m_port = $urls["port"];

}

if(!emptyempty($urls["path"]))

{

$this->m_path = $urls["path"];

}

$this->m_urlpath = $this->m_path;

if(!emptyempty($urls["query"]))

{

$this->m_query = $urls["query"];

$this->m_urlpath .= "?".$this->m_query;

}

$this->HomeUrl = $urls["host"];

$this->BaseUrlPath = $this->HomeUrl.$urls["path"];

$this->BaseUrlPath = preg_replace("/\/([^\/]*)\.(.*)$/","/",$this->BaseUrlPath);

$this->BaseUrlPath = preg_replace("/\/$/","",$this->BaseUrlPath);

}

}

/**

* 重设各参数

*

* @access public

* @return void

*/

function ResetAny()

{

$this->m_url = "";

$this->m_urlpath = "";

$this->m_scheme = "http";

$this->m_host = "";

$this->m_port = "80";

$this->m_user = "";

$this->m_pass = "";

$this->m_path = "/";

$this->m_query = "";

$this->m_error = "";

}

/**

* 打开指定网址

*

* @access public

* @param string $url 地址

* @param string $requestType 请求类型

* @return string

*/

function OpenUrl($url,$requestType="GET")

{

$this->ResetAny();

$this->JumpCount = 0;

$this->m_httphead = Array() ;

$this->m_html = '';

$this->dataLimit = 0;

$this->reTry = 0;

$this->Close();

//初始化系统

$this->PrivateInit($url);

$this->PrivateStartSession($requestType);

}

/**

* 转到303重定向网址

*

* @access public

* @param string $url 地址

* @return string

*/

function JumpOpenUrl($url)

{

$this->ResetAny();

$this->JumpCount++;

$this->m_httphead = Array() ;

$this->m_html = "";

$this->Close();

//初始化系统

$this->PrivateInit($url);

$this->PrivateStartSession('GET');

}

/**

* 获得某操作错误的原因

*

* @access public

* @return void

*/

function printError()

{

echo "错误信息:".$this->m_error;

echo "

具体返回头:

";

foreach($this->m_httphead as $k=>$v){ echo "$k => $v

\r\n"; }

}

/**

* 判别用Get方法发送的头的应答结果是否正确

*

* @access public

* @return bool

*/

function IsGetOK()

{

if( preg_match("/^2/",$this->GetHead("http-state")) )

{

return TRUE;

}

else

{

$this->m_error .= $this->GetHead("http-state")." - ".$this->GetHead("http-describe")."

";

return FALSE;

}

}

/**

* 看看返回的网页是否是text类型

*

* @access public

* @return bool

*/

function IsText()

{

if( preg_match("/^(2|401)/",$this->GetHead("http-state")) && preg_match("/text|xml|json/i",$this->GetHead("content-type")) )

{

return TRUE;

} else {

$this->m_error .= "内容为非文本类型或网址重定向

";

return FALSE;

}

}

/**

* 判断返回的网页是否是特定的类型

*

* @access public

* @param string $ctype 内容类型

* @return string

*/

function IsContentType($ctype)

{

if(preg_match("/^2/",$this->GetHead("http-state"))

&& $this->GetHead("content-type")==strtolower($ctype))

{ return TRUE; }

else

{

$this->m_error .= "类型不对 ".$this->GetHead("content-type")."

";

return FALSE;

}

}

/**

* 用Http协议下载文件

*

* @access public

* @param string $savefilename 保存文件名称

* @return string

*/

function SaveToBin($savefilename)

{

if(!$this->IsGetOK())

{

return FALSE;

}

if(@feof($this->m_fp))

{

$this->m_error = "连接已经关闭!"; return FALSE;

}

$fp = fopen($savefilename,"w");

while(!feof($this->m_fp))

{

fwrite($fp, fread($this->m_fp, 1024));

}

fclose($this->m_fp);

fclose($fp);

return TRUE;

}

/**

* 保存网页内容为Text文件

*

* @access public

* @param string $savefilename 保存文件名称

* @return string

*/

function SaveToText($savefilename)

{

if($this->IsText())

{

$this->SaveBinFile($savefilename);

}

else

{

return "";

}

}

/**

* 用Http协议获得一个网页的内容

*

* @access public

* @return string

*/

function GetHtml()

{

$tm1 = microtime(true);

if(!$this->IsText())

{

return '';

}

if($this->m_html!='')

{

return $this->m_html;

}

if(!$this->m_fp||@feof($this->m_fp))

{

return '';

}

while(!feof($this->m_fp))

{

$this->m_html .= fgets($this->m_fp,256);

if($this->dataLimit > 0 && strlen($this->m_html) > $this->dataLimit) break;

}

@fclose($this->m_fp);

$tm2 = microtime(true);

$log = "\ntm2-tm1 = ".($tm2-$tm1);

$log.= "\n".$this->m_html;

$this->log_write('GetHtml',$log);

return $this->m_html;

}

/**

* 开始HTTP会话

*

* @access public

* @param string $requestType 请求类型

* @return string

*/

function PrivateStartSession($requestType="GET")

{

if(!$this->PrivateOpenHost())

{

$this->m_error .= "打开远程主机出错!";

return FALSE;

}

$this->reTry++;

if($this->GetHead("http-edition")=="HTTP/1.1")

{

$httpv = "HTTP/1.1";

}

else

{

$httpv = "HTTP/1.0";

}

$ps = explode('?',$this->m_urlpath);

$headString = '';

//发送固定的起始请求头GET、Host信息

if($requestType=="GET")

{

$headString .= "GET ".$this->m_urlpath." $httpv\r\n";

}

else

{

$headString .= "POST ".$ps[0]." $httpv\r\n";

}

if($this->m_user || $this->m_pass)

{

$headString .= "Authorization: Basic ".base64_encode($this->m_user.":".$this->m_pass)."\r\n";

}

$this->m_puthead["Host"] = $this->m_host;

//发送用户自定义的请求头

if(!isset($this->m_puthead["User-Agent"]))

{

$this->m_puthead["User-Agent"] = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2)";

}

if(!isset($this->m_puthead["Refer"]))

{

$this->m_puthead["Refer"] = "http://".$this->m_puthead["Host"];

}

/* add on 2012-12-19 */

$headString.="Connection:keep-alive\r\n";

$headString.="Accept-Language:zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3\r\n";

$headString.="Accept:text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\n";

foreach($this->m_puthead as $k=>$v)

{

$k = trim($k);

$v = trim($v);

if($k!=""&&$v!="")

{

$headString .= "$k: $v\r\n";

}

}

fputs($this->m_fp, $headString);

if($requestType=="POST")

{

$postdata = "";

if(count($ps)>1)

{

for($i=1;$i{

$postdata .= $ps[$i];

}

}

else

{

$postdata = "OK";

}

$plen = strlen($postdata);

fputs($this->m_fp,"Content-Type: application/x-www-form-urlencoded\r\n");

fputs($this->m_fp,"Content-Length: $plen\r\n");

}

//发送固定的结束请求头

//HTTP1.1协议必须指定文档结束后关闭链接,否则读取文档时无法使用feof判断结束

if($httpv=="HTTP/1.1")

{

fputs($this->m_fp,"Connection: Close\r\n\r\n");

}

else

{

fputs($this->m_fp,"\r\n");

}

if($requestType=="POST")

{

fputs($this->m_fp,$postdata);

}

//获取应答头状态信息

$httpstas = explode(" ",fgets($this->m_fp,256));

$this->m_httphead["http-edition"] = trim($httpstas[0]);

$this->m_httphead["http-state"] = trim($httpstas[1]);

$this->m_httphead["http-describe"] = "";

for($i=2;$i{

$this->m_httphead["http-describe"] .= " ".trim($httpstas[$i]);

}

//获取详细应答头

while(!feof($this->m_fp))

{

$line = trim(fgets($this->m_fp,256));

if($line == "")

{

break;

}

$hkey = "";

$hvalue = "";

$v = 0;

for($i=0;$i{

if($v==1)

{

$hvalue .= $line[$i];

}

if($line[$i]==":")

{

$v = 1;

}

if($v==0)

{

$hkey .= $line[$i];

}

}

$hkey = trim($hkey);

if($hkey!="")

{

$this->m_httphead[strtolower($hkey)] = trim($hvalue);

}

}

//如果连接被不正常关闭,重试

if(feof($this->m_fp))

{

if($this->reTry > 10)

{

return FALSE;

}

$this->PrivateStartSession($requestType);

}

//判断是否是3xx开头的应答

if(preg_match("/^3/",$this->m_httphead["http-state"]))

{

if($this->JumpCount > 3)

{

return;

}

if(isset($this->m_httphead["location"]))

{

$newurl = $this->m_httphead["location"];

if(preg_match("/^http/i",$newurl))

{

$this->JumpOpenUrl($newurl);

}

else

{

$newurl = $this->FillUrl($newurl);

$this->JumpOpenUrl($newurl);

}

}

else

{

$this->m_error = "无法识别的答复!";

}

}

}

/**

* 获得一个Http头的值

*

* @access public

* @param string $headname 头文件名称

* @return string

*/

function GetHead($headname)

{

$headname = strtolower($headname);

return isset($this->m_httphead[$headname]) ? $this->m_httphead[$headname] : '';

}

/**

* 设置Http头的值

*

* @access public

* @param string $skey 键

* @param string $svalue 值

* @return string

*/

function SetHead($skey,$svalue)

{

$this->m_puthead[$skey] = $svalue;

}

/**

* 打开连接

*

* @access public

* @return bool

*/

function PrivateOpenHost()

{

if($this->m_host=="")

{

return FALSE;

}

if(function_exists('checkdnsrr') && !checkdnsrr($this->m_host,'A') && !checkdnsrr($this->m_host,'CNAME'))

{

$this->m_error = '远程主机'.$this->m_host.'不存在!checkdnsrr !';

return FALSE;

}

$errno = "";

$errstr = "";

$this->m_fp = @fsockopen($this->m_host, $this->m_port, $errno, $errstr,10);

if(!$this->m_fp)

{

$this->m_error = $errstr;

return FALSE;

}

else

{

return TRUE;

}

}

/**

* 关闭连接

*

* @access public

* @return void

*/

function Close()

{

@fclose($this->m_fp);

}

/**

* 补全相对网址

*

* @access public

* @param string $surl 需要不全的地址

* @return string

*/

function FillUrl($surl)

{

$i = 0;

$dstr = "";

$pstr = "";

$okurl = "";

$pathStep = 0;

$surl = trim($surl);

if($surl=="")

{

return "";

}

$pos = strpos($surl,"#");

if($pos>0)

{

$surl = substr($surl,0,$pos);

}

if($surl[0]=="/")

{

$okurl = "http://".$this->HomeUrl.$surl;

}

else if($surl[0]==".")

{

if(strlen($surl)<=1)

{

return "";

}

else if($surl[1]=="/")

{

$okurl = "http://".$this->BaseUrlPath."/".substr($surl,2,strlen($surl)-2);

}

else

{

$urls = explode("/",$surl);

foreach($urls as $u)

{

if($u=="..")

{

$pathStep++;

}

else if($i{

$dstr .= $urls[$i]."/";

}

else

{

$dstr .= $urls[$i];

}

$i++;

}

$urls = explode("/",$this->BaseUrlPath);

if(count($urls) <= $pathStep)

{

return "";

}

else

{

$pstr = "http://";

for($i=0;$i{

$pstr .= $urls[$i]."/";

}

$okurl = $pstr.$dstr;

}

}

}

else

{

if(strlen($surl)<7)

{

$okurl = "http://".$this->BaseUrlPath."/".$surl;

}

else if(strtolower(substr($surl,0,7))=="http://")

{

$okurl = $surl;

}

else

{

$okurl = "http://".$this->BaseUrlPath."/".$surl;

}

}

$okurl = preg_replace("/^(http:\/\/)/i","",$okurl);

$okurl = preg_replace("/\/{1,}/", "/", $okurl);

return "http://".$okurl;

}

function log_write($funcname,$message)

{

if(!(defined('DEBUG_LEVEL') && DEBUG_LEVEL == TRUE)) return ;

$log = "\n".date("Y-M-d H:i:s ").get_current_user()."[".getmypid()."]";

$log.= "\n".$this->m_url."\n".str_repeat('------', 10)."\n".$message;

$path = $funcname.' '.date('Y m d H i s ').preg_replace('/([\W]+|\s+)/i', ' ', $this->m_url);

if(strlen($path) > 250) $path = substr($path,0,250);

$dir = DEDEDATA.'/httpdownlog';

if(!is_dir($dir) && !mkdir($dir)) exit('Can not make dir '.$dir);

$path = $dir.'/'.$path;

if(!file_exists($path)) touch($path);

$fp = fopen($path,'a+');

flock($fp, LOCK_EX);

fputs($fp, "PATH:".$path."\nREAL:".realpath($path)."\nMSSG:".$log);

fclose($fp);

return TRUE;

}

}//End Class

使用方法:

[php]

$dhd = new DeDeHttpDown();

$dhd->OpenUrl($Rs['wurl']);

$dhd->dataLimit = 5120;

$dhd->m_puthead["Refer"] = $Rs['wurl'];

$filecnt = trim($dhd->GetHtml());

存在未解决的问题:

(1)假如域名是绑定了A记录和CNAME,有ip指向,但是IP地址是不存在的或者虚假的,程序仍旧会继续获取。

(2)PHP的fsockopen 里面的time out貌似根本就没有起作用。设置了10s超时,但是根本就是执行到程序time out 为止。

如果好的方法建议,可以随时联系我本人。admin@zbphp.com

http://www.bkjia.com/PHPjc/477811.htmlwww.bkjia.comtruehttp://www.bkjia.com/PHPjc/477811.htmlTechArticle1)新增远程主机判断,节约服务器资源。避免远程主机不存在的时候仍旧fsockopen,导致的死机占用服务器CPU (2)新增响应401的判断和支持...

本文原创发布php中文网,转载请注明出处,感谢您的尊重!

点击阅读全文7805e9cec91e05906ba862b2a92dad71.png

相关课程推荐

ac8f83ee679a6c85aa8428b3841e5e50.png

《javascript初级视频教程》是JavaScript的入门课程,旨在让大家认识,了解JavaScript的常见概念

c02f4679921bb67cd8e0cbd84e1757b6.png

jQuery 很容易学习,希望通过我们的《jquery 基础视频教程》可以帮助大家来更好的学习jQuery。 jQuery 是一个 JavaScript 库,简化了 JavaScript 编程。

jQuery教程36261次播放

fe204f986d83f07660db879925c67445.png

《javascript三级联动视频教程》介绍了javascript开发的三级联动功能,该功能在日常使用中还是经常能用的到的一个。

4920b024b54bdab948415e318dafd0e2.png

javascript是运行在浏览器上的脚本语言,连续多年,被评为全球最受欢迎的编程语言。是前端开发必备三大法器中,最具杀伤力。如果前端开发是降龙十八掌,好么javascript就是第18掌:亢龙有悔。没有它,你的前端生涯是不完整的。《php.cn独孤九贱(3)-JavaScript视频教程》课程特色:php中文网原创幽默段子系列课程,以恶搞,段子为主题风格的php视频教程!轻松的教学风格,简短的教学模式,让同学们在不知不觉中,学会了javascript知识。

d1665cebd52d950b4c26b8cedf069099.png

jQuery是一个快速、简洁的JavaScript框架。设计的宗旨是“write Less,Do More”,即倡导写更少的代码,做更多的事情。它封装JavaScript常用的功能代码,提供一种简便的JavaScript设计模式,优化HTML文档操作、事件处理、动画设计和Ajax交互。

核心特性可以总结为:具有独特的链式语法和短小清晰的多功能接口;具有高效灵活的css选择器,并且可对CSS选择器进行扩展;拥有便捷的插件扩展机制和丰富的插件。兼容各种主流浏览器,如IE 6.0+、FF 1.5+、Safari 2.0+、Opera 9.0+等,是全球最流行的前端开发框架之一。PHP中文网根据最新版本,独家录制jQuery最新视频教程,回馈PHP中文网的新老用户。

jQuery教程83412次播放

全部评论我要评论

取消发布评论发送

发布

1/1

303361.html

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值