<?php
require_once 'src/Core.php';
require_once 'src/Base.php';
require_once 'src/Exception.php';
use Ares333\CurlMulti\Core;
use Ares333\CurlMulti\Base;
use Ares333\CurlMulti\Exception;
//数据库链接
$db=new PDO('mysql:dbname=dbname;host=localhost','root','123456');
$db->exec("set names utf8");
//curl参数
$curl = new Core();
$curl->cbInfo =array(new Base(),'cbCurlInfo');//状态展示
$curl->maxThread = 10;//线程数量
$curl->taskPoolType = 'queue';//列队采集
$curl->cbTask = array('task_gather_common','tbname123');//根据不同任务级别修改任务方法
$curl->start();//开始任务
//通用采集
function task_gather_common($tbname){
global $db,$curl;
static $lastId=0;
$limit=10;
$artistList=$db->query("SELECT id,url from $tbname where id>$lastId order by id limit $limit")->fetchAll();
foreach($artistList as $v){
$localpath=GetLocalhtmlpath($v['url']);
$urlarry=array(
'url' => $v['url'],
'args' => array(
'id' => $v['id'],
'localpath' => $localpath
),
'opt'=>array(
CURLOPT_SSL_VERIFYPEER=>false
)
);
if(!file_exists($localpath))
{
$curl->add($urlarry,'task_callback');
$lastId=$v['id'];
}
}
}
function task_callback($r,$args){
global $db,$curl;
if($r['info']['http_code']==200){
$id=$args["id"];
$html=$r['content'];
$localpath =$args["localpath"];
file_put_contents($localpath,$html);
}
}
function GetLocalhtmlpath($url)
{
$md5str=md5($url);
$dir='D:/gather_html/partsimg_gather/'.substr($md5str,0,3).'/';
mkDirs($dir);
$htmlpath=$dir.$md5str.'.html';
return $htmlpath;
}
function mkDirs($dir){
if(!is_dir($dir)){
if(!mkDirs(dirname($dir))){
return false;
}
if(!mkdir($dir,0777)){
return false;
}
}
return true;
}
php curl采集案例
最新推荐文章于 2022-04-25 00:13:45 发布