php 批量抓取图片,网页等 准多进程加快下载


trait Curl
{
    /**
     * @var array 下载好的图片路径数组
     */
    protected $downImageMap = [];

    protected $multiImageDownPath = 'item';

    /**
     * 清除缓存
     */
    public function clearDownImageCache()
    {
        foreach ($this->downImageMap as $v) {
            $res = @unlink($v);
        }
    }

    /**
     * 批量下载 图片
     *
     * @param $urls
     * @param int  $timeOut
     * @param null $path
     * @param null $mapData
     * @param bool $clear
     *
     * @return false
     */
    protected function multiImageDown($urls, $timeOut = 60, $path = null, $mapData = null, $clear = true)
    {
        if (empty($urls)) {
            return false;
        }

        foreach ($urls as $k => $url) {
            if (!empty($path) && !empty($mapData[$url])) {
                $tmpPath = $path.$this->multiImageDownPath.$mapData[$url].'/'.basename($url);

                if (file_exists($tmpPath)) {
                    $this->downImageMap[$url] = $tmpPath;
                    unset($urls[$k]);
                }
            }
        }

        $mh   = curl_multi_init();
        $conn = [];

        foreach ($urls as $i => $url) {
            $conn[$i] = curl_init($url);
            curl_setopt($conn[$i], CURLOPT_USERAGENT, 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)');
            curl_setopt($conn[$i], CURLOPT_HEADER, 0);
            curl_setopt($conn[$i], CURLOPT_CONNECTTIMEOUT, 20);
            curl_setopt($conn[$i], CURLOPT_TIMEOUT, $timeOut);
            curl_setopt($conn[$i], CURLOPT_RETURNTRANSFER, true); // 设置不将爬取代码写到浏览器,而是转化为字符串
            curl_setopt($conn[$i], CURLOPT_FOLLOWLOCATION, 1); // 防止链接重定向
            curl_multi_add_handle($mh, $conn[$i]);
        }

        $active = null;

        do {
            usleep(200);  // 此处为了避免cpu 100%
            curl_multi_exec($mh, $active);
        } while ($active);

        foreach ($urls as $i => $url) {
            $data = curl_multi_getcontent($conn[$i]);// 获得爬取的代码字符串
            $ge   = explode('.', $url);

            if (!empty($path)) {
                $tmpPath = $path.$this->multiImageDownPath.$mapData[$url].'/'.basename($url);

                if (!File::isDirectory(dirname($tmpPath))) {
                    File::makeDirectory(dirname($tmpPath));
                }
            } else {
                $tmpPath = tempnam('/tmp', 'IMAGECURL'.end($ge));
            }

            file_put_contents($tmpPath, $data);

            if ($clear) {
                $this->downImageMap[$url] = $tmpPath;
            }
        }

        foreach ($urls as $i => $url) {
            curl_multi_remove_handle($mh, $conn[$i]);
            curl_close($conn[$i]);
        }

        curl_multi_close($mh);
    }
}


<?php
class A {
use \Curl;

public function get(array $urls){
   $this->multiImageDown($urls, 60);
   return $this->downImageMap;
}

}

?>

<?php

$a = new A();
$result = $a->get([
"http://baidu.com/66.png",
"http://haha.com/77.png"
]);
var_dump($result);

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值