1.截取php类文件 主要包括 获取url的html内容 然后从html内容中进行正则表达式匹配
include 'StringBuilder.php';
class CutPage{
function __construct(){
}
//方法一:连接 获取真个文件的文本内容
function getAllContent($url){
$resouce=fopen($url, "r") or die("文件打开失败!");
if(!$resouce){
echo "请求文件不存在!";
}
//$allcontent='';//长度太短了取不全用自购建的stringbuilder
$sb=new StringBuilder();
while(!feof($resouce)){
//如果没有到文件的结尾则继续向下执行
$line=fgets($resouce,4096);
$sb->append($line);
}
fclose($resouce);
return $sb->toString();
}
//方法二:获取所有的文本进行文本title的匹配---------格式为:xxxx(xx)xxx的形式表达式
function matchContentTitle($content,$regex_title){
//echo "regex:".$regex_title;
if(preg_match($regex_title, $content)){
$array=preg_split($regex_title, $content,-1,PREG_SPLIT_DELIM_CAPTURE);
return $array[1];
}else{
echo "匹配失败!";
}
}
//匹配章节 返回携带章节的array
function matchContentChapter($content,$regex_chapter){
if(preg_match_all($regex_chapter, $content,$matcher)){
return $matcher[1];
}else{
echo "匹配失败!";
return ;
}
}
}
测试php文件
include 'CutPage.php';
class Test{
publicstatic function start($url,$regex_title,$regex_chapter){
$cut=new CutPage();
$all=$cut->getAllContent($url);
$returnTitle=$cut->matchContentTitle($all, $regex_title);
echo "题目:".$returnTitle;
$arrays=$cut->matchContentChapter($all, $regex_chapter);
for($i=0;$i
echo $arrays[$i];
}
}
}
$url="http://www.quanben.com/xiaoshuo/12/12816/";
//正则表达式
$regex_title="/\s*[[(\S+)[[\s*/";
$regex_chapter="/\s*[](\W+)[\s*/";//章
Test::start($url, $regex_title, $regex_chapter);
//echo "cut".$return;
//匹配汉字的正则表达式^[\u4E00-\u9FA5]+
这里构建了一个类似JAVA中的一个StringBuilder类
class StringBuilder
{
const LINE="
";
protected $list= array('');
public function __construct( $str=NULL)
{
array_push($this->list,$str);
}
public function append($str)
{
array_push($this->list,$str);
return $this;
}
public function appendLine($str)
{
array_push($this->list,$str.self::LINE);
return $this;
}
public function appendFormat( $str,mixed $args)
{
array_push($this->list, sprintf($str,$args));
return $this;
}
public function toString()
{
return implode("",$this->list);
}
public function __destruct()
{
unset($this->list);
}
}