<?php
header("Content-Type:text/html;charset=UTF-8");
/*
* CURL网页抓取
* */
class Curl{
var $setopt;
var $data;
function __construct($url){
$this->setopt =array(
CURLOPT_URL => "$url",
CURLOPT_RETURNTRANSFER => true,
CURLOPT_FOLLOWLOCATION => true,
);
}
function exec(){
$ch = curl_init();
curl_setopt_array($ch,$this->setopt);
$this->data = curl_exec($ch);
curl_close($ch);
return $this->data;
}
};
/*
* 抓取回来的网页进行正则查找
* id是按ID查找内容
* tagName是标签查找
* className按类名查找*/
class Preg{
function id($data,$id){
preg_match('/<(.*)\s*id=.*('.$id.').*>\s*(.*)\s*<\/(.*)>/',$data,$str);
return $str[0];
}
function tagName($data,$tag){
preg_match('/<'.$tag.'.*>\s*(.*)\s*<\/'.$tag.'>/',$data,$str);
return $str[1];
}
function className($data,$class){
preg_match('/<(.*)\s*class=.*('.$class.').*>\s*(.*)\s*<\/(.*)>/',$data,$str);
return $str[0];
}
}
$c = new Curl('http://www.baidu.com');
$data = $c->exec();
$data = @iconv("gb2312", "utf-8",$data);
$preg = new Preg();
echo $preg->tagName($data,'div');
转自: http://blog.csdn.net/qq435792305/article/details/8502027