namespace Xuyaoxiang;
class Snoopy {
private $pattern_array=array(
'title'=>'/
(\s*.*)/i','description'=>'//',
'charset'=>'/charset=\"?([\w-]+)\"?/i',
);
public $url;
public $target_code="utf-8"; //目标编码
public $user_agent='Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.110 Safari/537.36'; //模拟浏览器头部数据
function __construct($url)
{
$this->url=$url;
}
public function set_pattern($key,$val)
{
if(isset($this->pattern_array[$key])){return false;}
$this->pattern_array[$key]=$val;
} //只能够添加规则,不能更改原有的规则
function get_pege_content($pattern_key)
{
if($pattern_key=='')
{
return false;
}
$curl=curl_init();
// 设置你需要抓取的URL
curl_setopt($curl, CURLOPT_URL, $this->url);
// 设置header
curl_setopt($curl, CURLOPT_HEADER, 0);
// 设置cURL 参数,要求结果保存到字符串中还是输出到屏幕上。
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($curl, CURLOPT_USERAGENT, $this->user_agent);
// 运行cURL,请求网页
$data = curl_exec($curl);
if($data==false){return false;}
preg_match($this->pattern_array['charset'],$data,$reg_charset); //获取网页编码
$page_charset=strtolower($reg_charset[1]);
$charset==$this->target_code || $data=mb_convert_encoding($data,$this->target_code,$page_charset); //如果不符合你设置的编码,进行转码
preg_match($this->pattern_array[$pattern_key],$data,$content);
// 关闭URL请求
curl_close($curl);
return trim($content[1]);
}
}
$snoopy=new snoopy("http://www.qq.com");
$title=$snoopy->get_pege_content('title');
print_r($title);