php-webdriver:PHP控制浏览器动作爬取京东商品

<?php

namespace App\Console\Commands;

use Facebook\WebDriver\Chrome\ChromeDriver;
use Facebook\WebDriver\WebDriverBy;
use Illuminate\Console\Command;
use Illuminate\Support\Facades\Log;

class SyncWebService extends Command
{
    /**
     * The name and signature of the console command.
     *
     * @var string
     */
    protected $signature = 'longer:sync-check-system {keywords?}';

    /**
     * The console command description.
     *
     * @var string
     */
    protected $description = 'Command description';

    /**
     * Create a new command instance.
     *
     * @return void
     */
    public function __construct()
    {
        parent::__construct();
    }

    /**
     * Execute the console command.
     */
    public function handle()
    {
        putenv('WEBDRIVER_CHROME_DRIVER=D://python/chromedriver.exe');
//        system('start D://python/jd_seckill/qr_code.png');
//        return;
        $this->startGoogleService();
    }

    protected function getVerifyCode()
    {
        $driver = ChromeDriver::start();
        $driver->manage()->window()->maximize();   //全屏
        $driver->get('https://www.fanglonger.com/login');
        $driver->takeScreenshot('login.png');
        sleep(4);
        $verifyCode = $driver->findElement(WebDriverBy::cssSelector('#s-canvas'));
        $location = $verifyCode->getLocationOnScreenOnceScrolledIntoView();
        $imageResource = getimagesize('login.png');
//        $right = $location['x'] + $imageResource[0];
//        $bottom = $location['y'] + $imageResource[1];
        $this->info(json_encode([$location,$imageResource]));
    }
    /**
     * todo:数据抓取
     * @param string $url
     */
    protected function startGoogleService($url = 'https://www.jd.com')
    {
        $driver = ChromeDriver::start();
        //京东商品
        $driver->manage()->window()->maximize();   //全屏
        $driver->get($url);
        $keywords = $driver->findElement(WebDriverBy::id('key'));
        $keywords->sendKeys($this->argument('keywords') ?? '飞天茅台');
        sleep(2);
        $driver->findElement(WebDriverBy::className('button'))->click();
        sleep(3);
        $this->getRequestJDItems($driver);
    }

    /**
     * todo:获取数据
     * @param ChromeDriver $driver
     */
    protected function getRequestJDItems(ChromeDriver $driver)
    {
        foreach (range(1, 9, 2) as $k) {
            sleep(2);
            $js = "document.documentElement.scrollTop = document.documentElement.scrollHeight * {$k} / 10";
            $this->info($js);
            $driver->executeScript($js);
        }
        $resource = $driver->findElements(WebDriverBy::className('gl-item'));
        $arr = [];
        foreach ($resource as $item) {
            # 商品链接
            $json['item_url'] = $this->hasElement($driver, WebDriverBy::cssSelector('.p-img a')) ?
                $item->findElement(WebDriverBy::cssSelector('.p-img a'))->getAttribute('href') : '';
            # 商品图片
            $json['src'] = $this->hasElement($driver, WebDriverBy::cssSelector('.p-img img')) ?
                $item->findElement(WebDriverBy::cssSelector('.p-img img'))->getAttribute('src') : '';
            # 商品价格
            $json['price'] = $this->hasElement($driver, WebDriverBy::cssSelector('.p-price i')) ?
                $item->findElement(WebDriverBy::cssSelector('.p-price i'))->getText() : '';
            # 商品名称
            $json['name'] = $this->hasElement($driver, WebDriverBy::cssSelector('.p-name em')) ?
                $item->findElement(WebDriverBy::cssSelector('.p-name em'))->getText() : '';
            # 商品评价
            $json['commit'] = $this->hasElement($driver, WebDriverBy::cssSelector('.p-commit strong')) ?
                $item->findElement(WebDriverBy::cssSelector('.p-commit strong'))->getText() : '';
            # 店铺名称
            $json['shop_name'] = $this->hasElement($driver, WebDriverBy::cssSelector('.p-shop')) ?
                $item->findElement(WebDriverBy::cssSelector('.p-shop'))->getText() : '';
            # 店铺地址
            $json['shop_url'] = '';
            if ($json['shop_name']) {
                $json['shop_url'] = $this->hasElement($driver, WebDriverBy::cssSelector('.p-shop a')) ?
                    $item->findElement(WebDriverBy::cssSelector('.p-shop a'))->getAttribute('href') : '';
            }
            # 輸出信息
            $this->info(json_encode($json, JSON_UNESCAPED_UNICODE));
            $arr[] = $json;
        }
        # 獲取當前頁
        $currentPage = $driver->findElement(WebDriverBy::cssSelector('#J_bottomPage .curr'))->getText();
        # 獲取總頁數
        $totalPage = $driver->findElement(WebDriverBy::cssSelector('#J_bottomPage .p-skip b'))->getText();
        if (intval($currentPage) < intval($totalPage)) {
            Log::error(json_encode($arr, JSON_UNESCAPED_UNICODE));
            $driver->findElement(WebDriverBy::className('pn-next'))->click();
            $this->info($driver->getCurrentURL());
            # 休息30S
            sleep(30);
            $this->info('休眠30秒');
            $this->getRequestJDItems($driver);
        } elseif (intval($currentPage) === intval($totalPage)) {
            Log::error(json_encode($arr, JSON_UNESCAPED_UNICODE));
            $driver->close();
        }
    }
    /**
     * 判断元素是否存在
     * @param ChromeDriver $driver
     * @param WebDriverBy $locator
     * @return bool
     */
    protected function hasElement(ChromeDriver $driver, WebDriverBy $locator): bool
    {
        try {
            $driver->findElement($locator);
            return true;
        } catch (\Exception $e) {
            $this->error($e->getMessage());
            return false;
        }
    }
}

  • 2
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值