<?php
namespace Home\Controller;
use Think\Controller;
class CollectionController extends Controller {
public function Collection(){
header("Content-Type: text/html;charset=utf-8");
// 初始化一个 cURL 对象
$curl = curl_init();
// 设置你需要抓取的URL
curl_setopt($curl, CURLOPT_URL, 'http://fenxiang.banguanshui.com/');
// 设置header
// curl_setopt($curl, CURLOPT_HEADER, 1);
// 设置cURL 参数,要求结果保存到字符串中还是输出到屏幕上。
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
// 运行cURL,请求网页
$data = curl_exec($curl);
// 关闭URL请求
curl_close($curl);
//$data是curl_exec返回的的值,即采集的目标内容
preg_match_all('/<span class="username" (.*)>(.*)<\/span>/isU',$data, $asd, PREG_SET_ORDER);
preg_match_all('/<h3><a href="(.*)" (.*)>(.*)<\/a>/',$data, $out, PREG_SET_ORDER);
$row =array();
$i = 0;
foreach($out as $key => $value){
//此处$value是数组,同时记录找到带匹配字符的整句和单独匹配的字符
$row[1] = 'http://fenxiang.banguanshui.com/'.$value[1];
$row[2] = $value[3];
// foreach($asd as $key => $value){
// $row[3] = $value[2];
// }
$row[3] = $asd[$i][2];
$i++;
// dump($row);exit;
$collection = M('collection');
$bata['url'] = $row[1];
$seke = $bata['url'];
// $sdsa = $bata['title'];
// print_r($seke);exit;
// $blog1 = $collection->where(array('url' => $seke))->find();
$blog = $collection->where(array('url' =>$seke))->find();
// echo strlen($seke);exit;
// print_r($blog1);exit;
if($bata['url'] != $blog['url']){
$bata['title'] = $row[2];
$bata['author'] = $row[3];
$cent = $collection->add($bata);
echo '添加成功';
dump($cent);
}
else{
echo '添加失败';
}
}
}
}
php采集网站
最新推荐文章于 2024-07-03 13:44:03 发布