php信息采集程序

最新推荐文章于 2024-10-10 12:45:21 发布

置顶 llx19861021366

最新推荐文章于 2024-10-10 12:45:21 发布

阅读量568

点赞数

分类专栏：语言专题文章标签： php 正则表达式 fp class file url

本文链接：https://blog.csdn.net/llx19861021366/article/details/3998773

版权

语言专题专栏收录该内容

0 篇文章 0 订阅

订阅专栏

<?
//采集首页地址
$url="http://emotion.pclady.com.cn/skills/";
//获取页面代码
$rs=file_get_contents($url);
//设置匹配正则
//$fp=fopen("text.txt","a");
//$fw=fwrite($fp,$rs);
//fclose($fp);
/*<A
href="http://emotion.pclady.com.cn/skills/0903/376476.html"
target=_blank>留住你身边的好男人</A>*/
$preg='/<i/s+class=/"titles/"><a/s+href=/"[^>]+/">(.*)<//a><//i>/i';
//进行正则搜索
preg_match_all($preg,$rs,$title);
//计算标题数量
$count=count($title[0]);
echo $count." ";
//通过标题数量进行内容采集
for ($i=0;$i<$count;$i++){

 //设置内容页地址
 $pr='/<a/s+href=/"[^>]+/">/isU';
 preg_match_all($pr,$title[0][$i],$jurl);
 $substr=substr($jurl[0][0],9);
 $curl=substr($substr,0,-18);
 //获取内容页代码
 $c=file_get_contents($curl);
 //设置内容页匹配正则
 $pc='/<a/s+href=/"[^>]+/">/i';
 //进行正则匹配搜索
 preg_match($pc,$c,$content);
 //输出标题
 echo $title[0][$i]." ";
 echo $title[1][$i]." ";
 $concount=count($content[0]);
 echo $concount." ";
 echo $content[0][0];
 for ($j=0;$j<$concount;$j++){

 }
}
?>