<?php defined('BASEPATH') OR exit('No direct script access allowed'); class Welcome extends CI_Controller { public function index() { $this->load->library('Curl'); $url="http://www.techweb.com.cn/"; $preg='#<div class="photo3">(.*)<a href="(.*)" target="_blank">(.*)<img (.*) src="(.*)">(.*)</a>(.*)</div>(.*)<h4><a title="(.*)" href="(.*)" target="_blank">(.*)</a></h4>(.*)<p>(.*)<a href="(.*)" target="_blank">(.*)</a>(.*)</p>#isU'; $arr=$this->curl->get_info($url,$preg); $info['title']=$arr[9];//标题 $info['href']=$arr[14];//超链接 $info['img']=$arr[5];//图片 $info['content']=$arr[13];//内容 //保存图片 $path='./public/img/'; $info['img']=$this->curl->get_img($info['img'],$path); $sql="insert into techweb (title,href,img,content) VALUES "; foreach($info['title'] as $k=>$v){ $sql.="('".mysql_escape_string($v)."','".mysql_escape_string($info['href'][$k])."','".mysql_escape_string($info['img'][$k])."','".mysql_escape_string($info['content'][$k])."'),"; } $sql=trim($sql,','); $res=$this->db->conn_id->exec($sql); if($res){ echo "<script>alert('添加成功');location.href='".site_url('Welcome/show')."'</script>"; }else{ echo "添加失败"; } } public function show(){ $file_name="./application/views/show_cat.html"; if(file_exists($file_name) && (time()-filemtime($file_name)<60)){ echo "静态页面"; echo file_get_contents($file_name); }else{ $stmt=$this->db->conn_id->prepare("select * from techweb"); $stmt->execute(); $arr=$stmt->fetchAll(PDO::FETCH_ASSOC); //开启缓冲 ob_start(); $this->load->vars('arr',$arr); $this->load->view('show.html'); $ob=ob_get_contents(); //关闭缓冲 ob_clean(); file_put_contents($file_name,$ob); echo "动态读取"; echo $ob; } } //分页加伪静态 public function search(){ $search=$this->input->get('search'); $memcache = new Memcache(); $memcache->connect("127.0.0.1",11211); if($memcache->get('search')) { $arr=$memcache->get('search'); if(array_key_exists("$search",$arr)) { $arr["$search"]=$arr["$search"]+1; $memcache->replace("search",$arr); }else { $arr["$search"]=1; $memcache->replace("search",$arr); } }else { $memcache->set("search",array("$search"=>1)); } $pdo = new PDO('mysql:host=127.0.0.1;dbname=seven_month','root','root'); $file_name="./application/views/page.xml"; //使用SimpleXMLElement 类的构造器构造实例化对象并创建根节点 $xml = new SimpleXMLElement('<Messages></Messages>'); if(file_exists($file_name)){ $xml=simplexml_load_file($file_name); }else{ if($search!='') { $where=$search; }else { $where=1; } /*** *分页 */ //计算总条数 $nums=$this->db->conn_id->query("select * from techweb WHERE title LIKE '%$where%'"); $nums=$nums->fetchAll(PDO::FETCH_ASSOC); $count=count($nums); //每页显示条数 $page_show=3; //当前页 $page=isset($_GET['page'])?$_GET['page']:1; //总页数 $page_num=ceil($count/$page_show); //偏移量 $limit=($page-1)*$page_show; //上一页 $prev=$page-1>1?$page-1:1; //下一页 $next=$page+1<$page_num?$page+1:$page_num; //分页后数据 $goods_data=$pdo->query("select * from techweb WHERE title LIKE '%$where%' limit $limit,$page_show")->fetchAll(PDO::FETCH_ASSOC); for($i=0;$i<count($goods_data);$i++){ //想循环创建$i 个 message标签 $xml->message[$i]=''; $xml->message[$i]['id']=$goods_data[$i]['id'];//将id放到message标签中去 作为一个属性 $xml->message[$i]->title=$goods_data[$i]['title']; $xml->message[$i]->href=$goods_data[$i]['href']; $xml->message[$i]->img=$goods_data[$i]['img']; $xml->message[$i]->content=$goods_data[$i]['content']; } $xml->asXML($file_name); $search_data=$memcache->get("search"); $this->load->vars('search_data',$search_data); $this->load->vars('page',$page); $this->load->vars('prev',$prev); $this->load->vars('next',$next); $this->load->vars('page_num',$page_num); $this->load->vars('search',$search); $this->load->vars('arr',$goods_data); $this->load->view('search.html'); } } } CURL.php<?php class Curl{ /** * 构造方法,初始化成员变量 */ public function __construct(){ $this->curl=curl_init(); } /** * @param $url * @param null $preg * @return int|mixed */ public function get_info($url,$preg=null){ $params[CURLOPT_URL]=$url; $params[CURLOPT_HEADER]=false; $params[CURLOPT_RETURNTRANSFER]=true; $params[CURLOPT_FOLLOWLOCATION]=true; $params[CURLOPT_USERAGENT]='Mozilla/5.0 (Windows NT 5.1; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'; $params[CURLOPT_POSTFIELDS] = ''; //$this->cookies(); curl_setopt_array($this->curl, $params); $content=curl_exec($this->curl); if(!empty($preg)&&isset($preg)){ preg_match_all($preg,$content,$arr); unset($arr[0]); return $arr; }else{ return $content; } } /** * @param $data 登录所需要的信息 * @param $url 表单提交的地址 * @return mixed 返回登录后页面内容 */ public function login_get_info($data,$url){ $params[CURLOPT_URL]=$url; $params[CURLOPT_HEADER]=false; $params[CURLOPT_RETURNTRANSFER]=true; $params[CURLOPT_FOLLOWLOCATION]=true; $params[CURLOPT_USERAGENT]='Mozilla/5.0 (Windows NT 5.1; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'; $postfields= ''; foreach ($data as $key => $value){ $postfields .= urlencode($key) . '=' . urlencode($value) . '&'; } $params[CURLOPT_POST] = true; $params[CURLOPT_POSTFIELDS]=$postfields; $this->cookies(); curl_setopt_array($this->curl, $params); return curl_exec($this->curl); } /** * 模拟采集的cookie信息 */ private function cookies(){ $cookie_path = './'; if(isset($_COOKIE['cookie_jar'])&&($_COOKIE['cookie_jar']||is_file($_COOKIE['cookie_jar']))){ $params[CURLOPT_COOKIEFILE] = $_COOKIE['cookie_jar']; }else{ $cookie_jar=tempnam($cookie_path, 'cookie'); $params[CURLOPT_COOKIEJAR] = $cookie_jar; setcookie('cookie_jar', $cookie_jar); } } /** * 采集远程图片 * @param $img 图片路径 是一个数组 * @param $save_path 图片保存在你本地的路径 * @return bool */ public function get_img($img,$save_path){ for($i=0;$i<count($img);$i++) { $res=@file_get_contents($img[$i]); $img_type=substr($img[$i], strrpos($img[$i], ".")); $path=$save_path.time().rand(1,9999999).mt_rand() .$img_type; $img[$i] = $path; file_put_contents($path,$res); } return $img; } }
CURL采集
最新推荐文章于 2022-04-25 00:13:45 发布