转载请注明: 藏羚骸的博客~hyperf协程免费查询快递物流.
小伙伴们可以先看免费查询快递物流这篇文章,文章详细介绍了php正常爬取物流信息思路,此文章是在上个文章基础上加入协程,属于一个升级版,之前有写过一篇hyperf协程大批量匹配快递物流信息的文章,这个也是介绍了协程执行速度比传统方式是有个质的飞跃,这里用 hyperf协程免费查询快递物流 。
php7.2+swoole4+hyperf2.0
Hyperf\config\autoload\server.php配置为
<?php
declare(strict_types=1);
/**
* This file is part of Hyperf.
*
* @link https://www.hyperf.io
* @document https://hyperf.wiki
* @contact group@hyperf.io
* @license https://github.com/hyperf/hyperf/blob/master/LICENSE
*/
use Hyperf\Server\Server;
use Hyperf\Server\SwooleEvent;
return [
'mode' => SWOOLE_PROCESS,
'servers' => [
[
'name' => 'http',
'type' => Server::SERVER_HTTP,
'host' => '0.0.0.0',
'port' => 8080,
'sock_type' => SWOOLE_SOCK_TCP,
'callbacks' => [
SwooleEvent::ON_REQUEST => [Hyperf\HttpServer\Server::class, 'onRequest'],
],
],
],
'settings' => [
'enable_coroutine' => true,
'worker_num' => swoole_cpu_num(),
'pid_file' => BASE_PATH . '/runtime/hyperf.pid',
'open_tcp_nodelay' => true,
'max_coroutine' => 100000,
'open_http2_protocol' => true,
'max_request' => 100000,
'socket_buffer_size' => 2 * 1024 * 1024,
'buffer_output_size' => 2 * 1024 * 1024,
'hook_flags' => SWOOLE_HOOK_ALL | SWOOLE_HOOK_CURL,
'daemonize' => true,
],
'callbacks' => [
SwooleEvent::ON_WORKER_START => [Hyperf\Framework\Bootstrap\WorkerStartCallback::class, 'onWorkerStart'],
SwooleEvent::ON_PIPE_MESSAGE => [Hyperf\Framework\Bootstrap\PipeMessageCallback::class, 'onPipeMessage'],
SwooleEvent::ON_WORKER_EXIT => [Hyperf\Framework\Bootstrap\WorkerExitCallback::class, 'onWorkerExit'],
],
];
Hyperf\config\ routes.php 配置为
Router::get('/getkdstatus2', 'App\Controller\KuaidiController::getkdstatus2');
Hyperf\app\Controller\KuaidiController.php
<?php
declare(strict_types=1);
/**
* This file is part of Hyperf.
*
* @link https://www.hyperf.io
* @document https://hyperf.wiki
* @contact group@hyperf.io
* @license https://github.com/hyperf/hyperf/blob/master/LICENSE
*/
namespace App\Controller;
use QL\QueryList;
use Hyperf\DbConnection\Db;
use Hyperf\HttpServer\Contract\RequestInterface;
class KuaidiController extends AbstractController
{
//批量获取快递单号
public function getkdstatus2(RequestInterface $request){
$current=$request->input('current','未获取');
$yunorders=Db::table('yunorders')->where('status',3)->where('current', $current)->get(['id','expressCode']);
$count=$yunorders->count();
$limit=100;
$page=intval(ceil($count/$limit));
for($p=$page-1;$p>=0; $p--){
$offset=$p*$limit;
$yunorders=Db::table('yunorders')->where('status',3)->where('current','未获取')->offset($offset)->limit($limit)->get(['id','expressCode']);
$count=$yunorders->count();
$yunorders=json_decode(json_encode($yunorders),true);
$info=$this->getinfo();
$wg = new \Hyperf\Utils\WaitGroup();
//等待协程 如果不加这个一下执行几千几万条,会有Allowed memory size of 268435456 bytes exhausted,内存溢出,所有我们协程一次执行一百和协程,等待一百协程执行成功后在执行下一个一百协程。
// 计数器
$wg->add($count);
for ($i=$count-1;$i>=0; $i--){
co(function () use ($i,$yunorders,$info,$wg) {
$url='https://express.baidu.com/express/api/express?tokenV2='.$info['tokenV2'].'&nu='.$yunorders[$i]['expressCode'];
$baiduid=ucfirst(md5($this->getrandstr(6).rand(10000,99999)));
$info['cookie']="BAIDUID=".$baiduid.":FG=1;";
$header = array (
"Host:express.baidu.com",
"Content-Type:application/x-www-form-urlencoded",//post请求
"Connection: keep-alive",
'Referer:http://www.baidu.com',
//关键作用User-Agent 可是每次爬取结果都是无法爬取到百度搜索的内容,要验证 user-agent没有模拟好,所以不行。
'User-Agent: Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.146 Safari/537.36',
'Cookie:'.$info['cookie']
);
$ch = curl_init ();
curl_setopt ( $ch, CURLOPT_URL, $url );
curl_setopt ( $ch, CURLOPT_HTTPHEADER, $header );
curl_setopt ( $ch, CURLOPT_RETURNTRANSFER, 1 );
$content = curl_exec ( $ch );
if ($content == FALSE) {
echo "error:" . curl_error ( $ch );
}
curl_close ( $ch );
$res=json_decode($content,true);
DB::table('yunorders')->where('id',$yunorders[$i]['id'])->update(['updated_at'=>date("Y-m-d H:i:s"),'current'=>$res['data']['info']['current']]);
$wg->done();
});
}
// 等待协程 A 和协程 B 运行完成
$wg->wait();
}
// echo("ok");
return 'ok';
}
//随机字符串
public function getrandstr($length){
$str = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz1234567890';
$randStr = str_shuffle($str);//打乱字符串
$rands= substr($randStr,0,$length);//substr(string,start,length);返回字符串的一部分
return $rands;
}
//获取快递页面信息
public function getinfo(){
//tn-- 提交搜索请求的来源站点 不加会不稳定
$url = 'https://www.baidu.com/s?tn=02003390_43_hao_pg&isource=infinity&&ie=utf-8&wd=%E5%BF%AB%E9%80%92';
$header = array (
"Host:www.baidu.com",
"Content-Type:application/x-www-form-urlencoded",//post请求
"Connection: keep-alive",
'Referer:http://www.baidu.com',
//关键作用User-Agent 可是每次爬取结果都是无法爬取到百度搜索的内容,要验证 user-agent没有模拟好,所以不行。
'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.54 Safari/537.36'
);
$ch = curl_init ();
curl_setopt ( $ch, CURLOPT_URL, $url );
curl_setopt ( $ch, CURLOPT_HTTPHEADER, $header );
curl_setopt ( $ch, CURLOPT_RETURNTRANSFER, 1 );
curl_setopt ( $ch, CURLOPT_HEADER, 1 );
$content = curl_exec ( $ch );
if ($content == FALSE) {
echo "error:" . curl_error ( $ch );
}
curl_close ( $ch );
//获取tokenV2
preg_match('/tokenV2=(.*?)"/i', $content, $match);
return ['tokenV2'=>$match[1]];
}
}
普通流程php curl 一秒一个,爬取二十个就用了二十秒,加了协程,1000单,大概是3秒左右。上面加了一个等待协程 ,如果不加这个一下执行几千几万条,会有Allowed memory size of 268435456 bytes exhausted,内存溢出,所有我们协程一次执行一百和协程,等待一百协程执行成功后在执行下一个一百协程。
转载请注明: 藏羚骸的博客~hyperf协程免费查询快递物流.