hyperf协程免费查询快递物流

7 篇文章 1 订阅
5 篇文章 0 订阅

转载请注明: 藏羚骸的博客~hyperf协程免费查询快递物流.

小伙伴们可以先看免费查询快递物这篇文章,文章详细介绍了php正常爬取物流信息思路,此文章是在上个文章基础上加入协程,属于一个升级版,之前有写过一篇hyperf协程大批量匹配快递物流信息的文章,这个也是介绍了协程执行速度比传统方式是有个质的飞跃,这里用 hyperf协程免费查询快递物流 。

php7.2+swoole4+hyperf2.0

Hyperf\config\autoload\server.php配置为

<?php

declare(strict_types=1);
/**
 * This file is part of Hyperf.
 *
 * @link     https://www.hyperf.io
 * @document https://hyperf.wiki
 * @contact  group@hyperf.io
 * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
 */
use Hyperf\Server\Server;
use Hyperf\Server\SwooleEvent;

return [
    'mode' => SWOOLE_PROCESS,
    'servers' => [
        [
            'name' => 'http',
            'type' => Server::SERVER_HTTP,
            'host' => '0.0.0.0',
            'port' => 8080,
            'sock_type' => SWOOLE_SOCK_TCP,
            'callbacks' => [
                SwooleEvent::ON_REQUEST => [Hyperf\HttpServer\Server::class, 'onRequest'],
            ],
        ],
    ],
    'settings' => [
        'enable_coroutine' => true,
        'worker_num' => swoole_cpu_num(),
        'pid_file' => BASE_PATH . '/runtime/hyperf.pid',
        'open_tcp_nodelay' => true,
        'max_coroutine' => 100000,
        'open_http2_protocol' => true,
        'max_request' => 100000,
        'socket_buffer_size' => 2 * 1024 * 1024,
        'buffer_output_size' => 2 * 1024 * 1024,
        'hook_flags' => SWOOLE_HOOK_ALL | SWOOLE_HOOK_CURL,
        'daemonize' => true,
        
    ],
    'callbacks' => [
        SwooleEvent::ON_WORKER_START => [Hyperf\Framework\Bootstrap\WorkerStartCallback::class, 'onWorkerStart'],
        SwooleEvent::ON_PIPE_MESSAGE => [Hyperf\Framework\Bootstrap\PipeMessageCallback::class, 'onPipeMessage'],
        SwooleEvent::ON_WORKER_EXIT => [Hyperf\Framework\Bootstrap\WorkerExitCallback::class, 'onWorkerExit'],
    ],
];

Hyperf\config\ routes.php 配置为

Router::get('/getkdstatus2', 'App\Controller\KuaidiController::getkdstatus2');

Hyperf\app\Controller\KuaidiController.php

<?php

declare(strict_types=1);
/**
 * This file is part of Hyperf.
 *
 * @link     https://www.hyperf.io
 * @document https://hyperf.wiki
 * @contact  group@hyperf.io
 * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
 */
namespace App\Controller;
use QL\QueryList;
use Hyperf\DbConnection\Db;
use  Hyperf\HttpServer\Contract\RequestInterface;
class KuaidiController extends AbstractController
{
    //批量获取快递单号
    public function getkdstatus2(RequestInterface $request){
        $current=$request->input('current','未获取');
        $yunorders=Db::table('yunorders')->where('status',3)->where('current', $current)->get(['id','expressCode']);
        $count=$yunorders->count();
        $limit=100;
        $page=intval(ceil($count/$limit));
        for($p=$page-1;$p>=0; $p--){
            $offset=$p*$limit;
            $yunorders=Db::table('yunorders')->where('status',3)->where('current','未获取')->offset($offset)->limit($limit)->get(['id','expressCode']);
            $count=$yunorders->count();
            $yunorders=json_decode(json_encode($yunorders),true);
            $info=$this->getinfo();
            $wg = new \Hyperf\Utils\WaitGroup();
            //等待协程  如果不加这个一下执行几千几万条,会有Allowed memory size of 268435456 bytes exhausted,内存溢出,所有我们协程一次执行一百和协程,等待一百协程执行成功后在执行下一个一百协程。
            // 计数器
            $wg->add($count);
            for ($i=$count-1;$i>=0; $i--){
                co(function () use ($i,$yunorders,$info,$wg) {
                    $url='https://express.baidu.com/express/api/express?tokenV2='.$info['tokenV2'].'&nu='.$yunorders[$i]['expressCode'];
                    $baiduid=ucfirst(md5($this->getrandstr(6).rand(10000,99999)));
                    $info['cookie']="BAIDUID=".$baiduid.":FG=1;";
                    $header = array (
                        "Host:express.baidu.com",
                        "Content-Type:application/x-www-form-urlencoded",//post请求
                        "Connection: keep-alive",
                        'Referer:http://www.baidu.com',
                        //关键作用User-Agent  可是每次爬取结果都是无法爬取到百度搜索的内容,要验证 user-agent没有模拟好,所以不行。
                        'User-Agent: Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.146 Safari/537.36',
                        'Cookie:'.$info['cookie']
                    );
                    $ch = curl_init ();
                    curl_setopt ( $ch, CURLOPT_URL, $url );
                    curl_setopt ( $ch, CURLOPT_HTTPHEADER, $header );
                    curl_setopt ( $ch, CURLOPT_RETURNTRANSFER, 1 );
                    $content = curl_exec ( $ch );
                    if ($content == FALSE) {
                    echo "error:" . curl_error ( $ch );
                    }
                    curl_close ( $ch );  
                    $res=json_decode($content,true);
                    DB::table('yunorders')->where('id',$yunorders[$i]['id'])->update(['updated_at'=>date("Y-m-d H:i:s"),'current'=>$res['data']['info']['current']]);
                    $wg->done();
                });
            }  
            // 等待协程 A 和协程 B 运行完成
            $wg->wait();
        }
        // echo("ok");
        return 'ok';
    }
    
    //随机字符串
    public function getrandstr($length){
     $str = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz1234567890';
     $randStr = str_shuffle($str);//打乱字符串
     $rands= substr($randStr,0,$length);//substr(string,start,length);返回字符串的一部分
     return $rands;
    }
    
    //获取快递页面信息
    public function getinfo(){
        //tn-- 提交搜索请求的来源站点 不加会不稳定
        $url = 'https://www.baidu.com/s?tn=02003390_43_hao_pg&isource=infinity&&ie=utf-8&wd=%E5%BF%AB%E9%80%92';
        $header = array (
          "Host:www.baidu.com",
          "Content-Type:application/x-www-form-urlencoded",//post请求
          "Connection: keep-alive",
          'Referer:http://www.baidu.com',
          //关键作用User-Agent  可是每次爬取结果都是无法爬取到百度搜索的内容,要验证 user-agent没有模拟好,所以不行。
          'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.54 Safari/537.36'
        );
        $ch = curl_init ();
        curl_setopt ( $ch, CURLOPT_URL, $url );
        curl_setopt ( $ch, CURLOPT_HTTPHEADER, $header );
        curl_setopt ( $ch, CURLOPT_RETURNTRANSFER, 1 );
        curl_setopt ( $ch, CURLOPT_HEADER, 1 );
        $content = curl_exec ( $ch );
        if ($content == FALSE) {
        echo "error:" . curl_error ( $ch );
        }
        curl_close ( $ch );
        //获取tokenV2
        preg_match('/tokenV2=(.*?)"/i', $content, $match);
        return ['tokenV2'=>$match[1]];
    }
    
}

普通流程php curl 一秒一个,爬取二十个就用了二十秒,加了协程,1000单,大概是3秒左右。上面加了一个等待协程 ,如果不加这个一下执行几千几万条,会有Allowed memory size of 268435456 bytes exhausted,内存溢出,所有我们协程一次执行一百和协程,等待一百协程执行成功后在执行下一个一百协程。

转载请注明: 藏羚骸的博客~hyperf协程免费查询快递物流.

  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值