思路:
1、 首先是采用curl的方式采集到这个网页的内容
2、 用正则的方式或者html解析器把url分析出来3、 对于每一个url,进行请求,如果状态不是2xx、3xx等就定义为异常。
打个小广告:四脚猫视频下载,四脚猫培训视频下载,四脚猫php视频下载,四脚猫php百度云视频下载,最新版下载与官网同步,请联系本人V43599939
下载地址:=》四脚猫最新php视频下载地址
<?php
class http_stat{
public $url;
private $document;
private $links;
private $domain;
private $links_stat;
public function get_all_link_status($url){
if($this->__get_document($url)!=false){
$this->url = $url;
$this->document = $this->__get_document($this->url);
$this->links = $this->__strip_links($this->document);
foreach($this->links as $val){
if($val == '#'){
$res['empty'] +=1;
}elseif(strpos($val,'http')!==false){
$state_num = $this->__get_http_status($val);
$res[$state_num][] = $val;
}else{
$url = $this->url.$val;
$state_num = $this->__get_http_status($url);
$res[$state_num][] = $val;
}
}
return $res;
}
}
private function __get_http_status($s_url){
$curl = curl_init();
curl_setopt($curl,CURLOPT_URL,$s_url);
curl_setopt($curl,CURLOPT_HEADER,1);
curl_setopt($curl,CURLOPT_NOBODY,1);
curl_setopt($curl,CURLOPT_RETURNTRANSFER,1);
curl_setopt($curl,CURLOPT_TIMEOUT,30);
curl_exec($curl);
$rtn= curl_getinfo($curl,CURLINFO_HTTP_CODE);
curl_close($curl);
return $rtn;
}
private function __get_document($url){
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
$response = curl_exec($ch);
if(curl_errno($ch))
{
print curl_error($ch);
return false;
}
curl_close($ch);
return $response;
}
private function __strip_links($document){
preg_match_all('|<a(.*?)href="(.*?)"(.*?)>(.*?)</a>|i', $document, $links);
while(list($key,$val) = each($links[2])){
if(!empty($val))$match[] = $val;
}
return $match;
}
}
$t = new http_stat();
$res = $t->get_all_link_status("http://www.sina.com.cn");
var_dump($res);