php实现爬虫监测负面,php写了一个爬虫,求大家看看,运行时总有点问题,自身来看逻辑上没问题啊...

aee7c0b99fa019a633be84ac79bc76ea.png

悬赏园豆:20

[已关闭问题]

关闭于 2017-09-01 09:47

57e7d7b654833469823ca1a38534c098.png

<?php
class spider {
private $filename;
private $filename_list;

function __construct($filename='url.txt', $filename_list='url_already.txt') {
$this->filename = $filename;
$this->filename_list = $filename_list;
}

function net($num) {
  for($i=0; $i<$num; $i++) {
  $url = $this->fopen_one();
  $url_list = $this->fopen_list();

  if(in_array($url,$url_list)) {
    preg_match('/\.(.*)\./iU',$url,$mat);
    $url_name = $mat[1];
    $output = $this->get_content($url);
    $result = $this->get_url($output,$url,$url_name);
    $this->write_list($result);
    $this->write_one($url);
  } else {
    $this->one_delete($url);
  }
}
}


function get_content($url) {
  $ch = curl_init();
  curl_setopt($ch,CURLOPT_URL,$url);
  curl_setopt($ch,CURLOPT_RETURNTRANSFER,true);
  $output = curl_exec($ch);
  curl_close($ch);
  return $output;
}

function fopen_one() {
  $handle = fopen($this->filename,'r');
  $buffer = fgets($handle,4096);
  $url = trim($buffer);
  fclose($handle);
  return $url;
}

function fopen_list() {
  $handle = fopen($this->filename_list,'r');
  while (!feof($handle)) {
    $buffer = fgets($handle,4096);
    $url_list[] = trim($buffer);
  }
  fclose($handle) ;
  return $url_list;
}

function get_url($output,$url,$url_name) {
  $pattern = "/\<a.href=[\'\"].*[\'\"].*\>.*\<\/a\>/iU";
  preg_match_all($pattern,$output,$matches,PREG_SET_ORDER);
  if(is_array($matches)) {
    foreach($matches as $a) {
    $b = $a[0];
    if(preg_match("/\"(.*)\"/iU",$b,$c)) {
      $d = $c[1];
      if(preg_match("/^\/.*/i",$d,$e)) {
      $g = 'http://'.$url.$e[0];
      } else {
      $g = $d;
      }

    if(preg_match("/.*$url_name.*/",$g,$h)) {
    $result[] = trim($h[0]);
  }
  }
}
  $result = array_unique($result);
}
return $result;
}


function write_list($result) {
  $handle = fopen($this->filename,'a');
  foreach($result as $one) {
    $one = "\n".$one;
    fwrite($handle,$one);
  }
  fclose($handle);
}
function write_one($url) {
  $handle = fopen($this->filename_list,'a');
  $url = "\n".trim($url);
  fwrite($handle,$url);
  fclose($handle);
}
function one_delete($url) {
  $handle = fopen($this->filename,'r');
  while(!feof($handle)) {
    $buffer = fgets($handle,4096);
    $url_list[] = trim($buffer);
  }
  fclose($handle);
  $key = array_search($url,$url_list);
  unset($url_list[$key]);
  $handle = fopen($this->filename,'w');
  foreach($url_list as $k) {
    fwrite($handle,"\n".$k);
  }
  fclose($handle);
  }
}

5a3161f355757e8e05636ab84c3aeaf9.png

小龙鬼

|

初学一级

|

园豆:189

提问于:2017-08-29 09:28

显示帮助

使用"Ctrl+Enter"可进行快捷提交,评论支持部分 Markdown 语法:[link](http://example.com) _italic_ **bold** `code`。

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值