一:通过culr抓取网站,产品列表url并且入库
<?php namespace App\Console\Commands\Cacuqecig; use Illuminate\Console\Command; use App\Models\Cacuqecig\Cacuqecig; class Product extends Command { /** * The name and signature of the console command. * * @var string */ protected $signature = 'loadProductFromCacuqecig'; /** * The console command description. * * @var string */ protected $description = 'loadProductFromCacuqecig'; /** * Create a new command instance. * * @return void */ public function __construct() { parent::__construct(); } /** * Execute the console command. * * @return mixed */ public function handle() { for($n = 1; $n < 1000; ++$n) { $url = "https://www.cacuqecig.com/search.html?page_size=48&p={$n}"; $rtn = $this->get($url); $pattern = '/<li skuid="(?<sku>.*?)" data-pro="[0-9]">(.*?)<a href="(?<url>.*?)"\>/'; preg_match_all($pattern, $rtn, $matches); if(empty($matches['sku'])) { break; } foreach ($matches['sku'] as $k => $v) { $param = [ 'url' => $matches['url'][$k], ]; if (Cacuqecig::where('url', $param['url'])->first()) { Cacuqecig::updated($param); } else { Cacuqecig::create($param); } } } return true; } public function get($url) { $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_HEADER, false); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); $rtn = curl_exec($ch); return preg_replace("/\n/",'',$rtn); } }
二:数据库获取产品列表url,遍历详情,正则匹配所要数据,包括模拟用户登录
<?php namespace App\Console\Commands\Cacuqecig; use App\Exceptions\Handler; use Illuminate\Console\Command; use App\Console\Commands\Cacuqecig\Product; use App\Models\Elegomall\Elegomall; use App\Models\Cacuqecig\Cacuqecig; class ProductInfo extends Command { /** * The name and signature of the console command. * * @var string */ protected $signature = 'loadProductInfoFromCacuqecig'; /** * The console command description. * * @var string */ protected $description = 'loadProductInfoFromCacuqecig'; /** * Create a new command instance. * * @return void */ public function __construct() { parent::__construct(); } /** * Execute the console command. * * @return mixed */ public function handle() { $root = "https://www.cacuqecig.com"; $cookie = base_path() . '/cookie_cacuqecig.txt'; $this->login($cookie); // 从数据库独处产品列表,遍历下载产品详情 $products_url = Cacuqecig::select('url')->get()->toArray(); foreach($products_url as $v) { $url = "{$root}{$v['url']}"; $rtn = $this->get($url, $cookie); $title = '/<div class="pro_show_name">(\s*.*?)<\/div>/s'; preg_match_all($title, $rtn, $matchetitle); $img = '/<div class="list-img"><img.*? src=\"(.*?)\".*?\/><\/div>/s'; preg_match_all($img, $rtn, $matcheimg); $price = '/<span class="price".*?>(.*?)<\/span>/is'; preg_match_all($price, $rtn, $matcheprice); $childs = []; // 获取子产品SKU和属性 $subs = explode('"list-mods', $rtn); if (!empty($subs[1])) { unset($subs[0]); $patternAttr = '/<div class="pro-attr" title="(?<sku>.*?)">(?<attr>.*?)<span/'; preg_match_all($patternAttr, $subs[1], $matcheAttrs); foreach ($matcheAttrs['sku'] as $k => $v) { $childs[$k] = [ 'title' => $matchetitle[1][0] ?? 0, 'img' => $matcheimg[1][0] ?? 0, 'price' => $matcheprice[1][1] ?? 0, 'sku' => $v, 'attr' => $matcheAttrs['attr'][$k] ?? '', 'origin' => 'cacuqecig', 'type' => $url, ]; // 获取子产品库存 foreach ($subs as $warehouseIndex => $sub) { $patternStock = '/<span class="liststock' . $v . '">(?<qty>.*?)<\/span>/'; preg_match($patternStock, $sub, $matcheStock); $childs[$k]['qty'] = $matcheStock['qty'] ?? 0; } } foreach ($childs as $key=>$val) { $param['title'] = $val['title']; $param['product'] = $val['title']; $param['img'] = $val['img']; $param['sku'] = $val['sku']; $param['color'] = $val['attr']; $param['origin'] = $val['origin']; $param['types'] = $val['type']; $param['stock'] = $val['qty']; $param['price'] = $val['price']; $param['created_at'] = date_format(now(), 'Y-m-d'); //前一天时间 $info['last_day'] = date('Y-m-d', strtotime($param['created_at'])-3600*24); $stockLast = Elegomall::select('title', 'color', 'stock')->where('title', $param['title'])->where('color', $param['color'])->where('created_at',$info['last_day'])->first(); if (!empty($stockLast->stock)) { $last_day = $stockLast->stock; } else { $last_day = 0; } //当天减去前一天 正数为补货量 负数为销量 $param['diff'] = $param['stock'] - $last_day; Elegomall::create($param); } } else { continue; } } } public function login($cookie) { $post = [ 'email' => '1279991307@qq.com', 'password' => 'cy123456', ]; $UserAgent = 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; SLCC1; .NET CLR 2.0.50727; .NET CLR 3.0.04506; .NET CLR 3.5.21022; .NET CLR 1.0.3705; .NET CLR 1.1.4322)'; $headers = [ "Content-type: application/x-www-form-urlencoded; charset=UTF-8", ]; $curl = curl_init();//初始化curl模块 curl_setopt($curl, CURLOPT_URL, "https://www.cacuqecig.com/user/loginAjax");//登录提交的地址 curl_setopt($curl, CURLOPT_HEADER, false);//是否显示头信息 curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);//是否自动显示返回的信息 curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false); curl_setopt($curl, CURLOPT_COOKIEJAR, $cookie); //设置Cookie信息保存在指定的文件中 curl_setopt($curl, CURLOPT_POST, 1);//post方式提交 curl_setopt($curl, CURLOPT_POSTFIELDS, http_build_query($post));//要提交的信息 curl_setopt($curl, CURLOPT_USERAGENT, $UserAgent); curl_setopt($curl, CURLOPT_HTTPHEADER, $headers); $rtn = curl_exec($curl);//执行cURL curl_close($curl);//关闭cURL资源,并且释放系统资源 return $cookie; } public function get($url, $cookie) { $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_HEADER, false); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie); //读取cookie $rtn = curl_exec($ch); $result = preg_replace("/\n/",'',$rtn); return $result; } }
三:配置定时任务
protected function schedule(Schedule $schedule) { $schedule->command('loadProductFromElegomall') ->daily()->withoutOverlapping(); $schedule->command('loadProductFromCacuqecig') ->daily()->withoutOverlapping(); $schedule->command('loadProductInfoFromCacuqecig') ->daily()->withoutOverlapping(); $schedule->command('loadProductFromDemandvape') ->daily()->withoutOverlapping(); $schedule->command('loadProductInfoFromDemandvape') ->daily()->withoutOverlapping(); }
四:运行代码,获取数据
win: cmd命令行执行 php artisan xxxxxxxxxxxxxx
Linux: crontab -e 配置
五:数据展示