php笑话采集,thinkphp使用querylist采集笑话网站笔记

thinkphp使用querylist采集笑话网站笔记,使用thinkphp commands命令写法采集小说数据,不多说直接上代码.

1.创建数据采集记录表CREATE TABLE `joke_list` (

`id` int(11) unsigned NOT NULL AUTO_INCREMENT,

`category` varchar(255) DEFAULT NULL,

`title` varchar(255) DEFAULT NULL,

`link` varchar(255) DEFAULT NULL,

`status` tinyint(4) DEFAULT '1',

`create_time` datetime DEFAULT NULL,

`update_time` datetime DEFAULT NULL,

PRIMARY KEY (`id`)

) ENGINE=MyISAM AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;

CREATE TABLE `joke_detail` (

`id` int(11) unsigned NOT NULL AUTO_INCREMENT,

`category` varchar(255) DEFAULT NULL,

`title` varchar(255) DEFAULT NULL,

`tag` varchar(255) DEFAULT NULL,

`description` varchar(2000) DEFAULT NULL,

`content` text,

`link` varchar(255) DEFAULT NULL,

`status` tinyint(4) DEFAULT '1',

`create_time` datetime DEFAULT NULL,

`update_time` datetime DEFAULT NULL,

PRIMARY KEY (`id`)

) ENGINE=MyISAM AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;

2.编写数据库模型

JokeDetail.php

declare (strict_types = 1);

namespace app\model;

use think\Model;

/**

* @mixin \think\Model

*/

class JokeDetail extends Model

{

protected $autoWriteTimestamp = 'datetime';

}

JokeList.php

declare (strict_types = 1);

namespace app\model;

use think\Model;

/**

* @mixin \think\Model

*/

class JokeList extends Model

{

protected $autoWriteTimestamp = 'datetime';

}

3.安装querylist框架

执行composer安装命令composer require jaeger/querylist

参考安装文档

4.创建commands编写采集脚本代码<?php

declare (strict_types=1);

namespace app\command;

use app\model\JokeDetail;

use app\model\JokeList;

use think\console\Command;

use think\console\Input;

use think\console\Output;

use QL\QueryList;

use QL\Ext\AbsoluteUrl;

use GuzzleHttp\Exception\RequestException;

use think\facade\Log;

class Joke extends Command

{

protected function configure()

{

// 指令配置

$this->setName('caiji')

->setDescription('the caiji command');

}

protected function execute(Input $input, Output $output)

{

$this->task();

$this->task2();

}

protected function task()

{

$domain = "https://xiaohua.bbwx.com/";

$ql = QueryList::getInstance();

$ql->use(AbsoluteUrl::class);

$rules = array(

'category' => ['header>a', 'text'],

'link' => ['h2 a', 'href'],

'title' => ['h2 a', 'text']

);

// 切片选择器

$range = ".content .excerpt-text";

$i = 1;

while (true) {

try {

$q1 = $ql->get("https://xiaohua.bbwx.com/page/{$i}", null, ["timeout" => 30])->absoluteUrl($domain);

$rt1 = $q1->rules($rules)

->range($range)->query()->getData()->all();

Log::INFO("开始采集第{$i}页数据!");

if ($rt1) {

$jokeList = new JokeList();

$jokeList->saveAll($rt1);

}

} catch (RequestException $e) {

Log::INFO("第一页数据请求超时,正在重试!");

continue;

}

if ($i >= 146) {

break;

}

$i++;

};

}

protected function task2()

{

$ql = QueryList::getInstance();

$jokeList = JokeList::where("status", 1)->select();

foreach ($jokeList as $joke) {

try {

$q1 = $ql->get($joke->link, null, ["timeout" => 30]);

$q1->find('.content .article-content .asb')->remove();

$joke_detail = [

"category"=> $joke->category,

"title"=> $ql->find('.article-title')->text(),

"tag"=>$ql->find(".article-tags>a")->text(),

"description"=>$ql->find("meta[name='description']")->attr("content"),

"content"=>htmlentities($ql->find('.content .article-content')->html()),

"link"=>$joke->link,

"status"=>2

];

$jokeDetail=JokeDetail::where("link",$joke->link)->findOrEmpty();

$jokeDetail->save($joke_detail);

$joke->status=2;

$joke->save();

} catch (RequestException $e) {

Log::INFO("请求超时{$joke->link},正在重试!");

continue;

}

}

}

}

5.注册该脚本命令config/console.php

// +----------------------------------------------------------------------

// | 控制台配置

// +----------------------------------------------------------------------

return [

// 指令定义

'commands' => [

'joke' => 'app\command\Joke',

],

];

6.执行命令采集结果等待采集完成

PHP think joke

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值