php多进程采集百度,php实现多进程下载百度网盘文件

大家知道百度网盘下载对于非会员是有下载限速的, 最大速度基本上维持在 100kB/s以内,要下个电影啥的,那就有得等了. 之前还可以把网盘里的文件链接解析出来放到Uget或迅雷之类的下载工具里去多线程下载, 但是现在百度的文件服务器对ua做了校验, 没找到好用的可编辑http header的下载工具, 于是自己动手写一个了.基本上可以充分利用已有的带宽

1777b2f02655?from=groupmessage

download_demo.gif

PS: 请自行安装swoole拓展和Guzzle http包.

直接上代码吧:

include 'vendor/autoload.php';

// $service = new Service('http://peterq.cn/movie/api/video_redirect?fid=543468589252145', __DIR__);

$service = new Service('http://peterq.cn/movie/api/video_redirect?fid=364402848596280', __DIR__);

$service->start();

use GuzzleHttp\Client;

class Service

{

/**

* @var Client;

*/

protected $client;

protected $worker_pool; // 下载进程池

protected $available_worker_queue; // 可用的进程队列

protected $worker_number = 16; // 定义需要开多少个进程, 文件较小时, 并不一定全部用得上, 取决于你的分片大小

protected $started = false; // 是否已经开始下载

protected $url; // 下载链接

protected $length; // 文件大小

protected $dir; // 保存目录

protected $filename; // 文件绝对路径

protected $downloaded = 0; // 已下载字节数

protected $speedArr = []; // 用来计算下载速度的数组

protected $distributed = 0; // 对于要下载的文件, 已经分配到哪个位置了

public function __construct($url, $dir)

{

$this->url = $url;

$this->dir = realpath($dir);

}

public function start()

{

if ($this->started) return;

$this->available_worker_queue = new SplQueue();

$this->started = true;

// 创建客户端

$this->client = new Client([

'headers' => [

"Accept" => "application/json, text/javascript, text/html, */*; q=0.01",

"Accept-Encoding" => "gzip, deflate, sdch",

"Accept-Language" => "en-US,en;q=0.8,zh-CN;q=0.6,zh;q=0.4,zh-TW;q=0.2",

"Referer" => "http://pan.baidu.com/disk/home",

"X-Requested-With" => "XMLHttpRequest",

"User-Agent" => "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.108 Safari/537.36",

"Connection" => "keep-alive",

],

]);

// 设置进程名称

swoole_set_process_name('download-master');

echo 'master pid:' . posix_getpid() . PHP_EOL;

// 创建多个下载进程

for ($i = 0; $i < $this->worker_number; $i++) {

$process = $this->createProcess($i);

$this->worker_pool[$i] = $process;

// 通信通道加入事件轮训, 进行异步通信

swoole_event_add($process->pipe, function ($pipe) use ($process) {

$data = $process->read();

$data = unserialize($data);

$this->handleChildMessage($process, $data['type'], $data['data']);

});

$process->start();

$this->available_worker_queue->enqueue($process);

}

// 子进程退出回收

swoole_process::signal(SIGCHLD, function ($sig) {

static $exited = 0;

// 必须为false,非阻塞模式

while ($ret = swoole_process::wait(false)) {

echo "child process exited, PID={$ret['pid']}\n";

$exited++;

if ($exited == count($this->worker_pool)) exit();

}

});

$this->initDownload();

}

// 初始化下载

protected function initDownload()

{

$resp = $this->client->request('GET', $this->url, [

'stream' => true,

'read_timeout' => 10,

]);

// 处理重定向

while (in_array($resp->getStatusCode(), [301, 302])) {

$this->url = $resp->getBody()->read(1024);

dump('redirect: ' . $this->url);

$resp = $this->client->request('GET', $this->url, [

'stream' => true,

'read_timeout' => 10,

]);

}

if (!$resp->getHeader('Content-Disposition')) {

dump('not a file download url');

}

$this->length = intval($resp->getHeader('Content-Length')[0]);

$fname = $resp->getHeader('Content-Disposition')[0];

$fname = substr($fname, strpos($fname, 'filename=') + strlen('filename='));

$fname = urldecode($fname);

$this->filename = $this->dir . '/' . $fname;

dump([

'文件' => $this->filename,

'大小' => round($this->length / 1024 / 1024, 2) . 'MB'

]);

file_put_contents($this->filename, '');

$this->download();

}

// 启动下载

protected function download()

{

while (

$this->distributed < $this->length

&& $this->available_worker_queue->count()

&& $process = $this->available_worker_queue->dequeue()) {

$this->distributeSegment($process);

}

}

// 分配下一块区间给一个进程

protected function distributeSegment($process)

{

// 分成 1 MB 一个段去下载

$size = 1 * 1024 * 1024;

$process->write(serialize([

'type' => 'new-segment',

'data' => [

'url' => $this->url,

'file' => $this->filename,

'start' => $this->distributed,

'length' => min($size, $this->length - $this->distributed),

]

]));

$this->distributed += $size;

}

// 进程间通信处理

protected function handleChildMessage($process, $type, $data)

{

method_exists($this, 'on' . ucfirst($type)) and $this->{'on' . ucfirst($type)}($process, $data);

}

// 当下载进程下载一小块时, 通过此回调通知master进程

protected function onRange(swoole_process $process, $data)

{

$this->downloaded += $data;

static $lastClearTime = 0;

$time = time();

$this->speedArr[$time] = $this->speedArr[$time] ?? 0;

$this->speedArr[$time] += $data;

// 取过去 5 秒作为平均速度 作为速度显示, 粗略计算, 并不准确

if ($time > $lastClearTime) {

$lastClearTime = $time;

foreach ($this->speedArr as $t => $size) {

if ($t < $time - 5) unset($this->speedArr[$t]);

}

}

$speed = array_sum($this->speedArr) / count($this->speedArr);

$percent = $this->downloaded / $this->length * 100;

$percent = round($percent, 2);

$size = humanSize($this->downloaded);

$speed = humanSize($speed);

echo "\r\033[2K" . "已下载: $size, $percent%; 当前速度: $speed/s";

}

// 当分配给下载进程的下载任务完成时执行的回调

protected function onTaskFinished($process, $data)

{

if ($this->distributed < $this->length)

$this->distributeSegment($process);

else {

$this->available_worker_queue->enqueue($process);

if ($this->available_worker_queue->count() == count($this->worker_pool)) {

dump('文件下载完成');

foreach ($this->worker_pool as $worker) {

$worker->write(serialize([

'type' => 'exit', 'data' => ''

]));

}

}

}

}

// 创建下载进程

protected function createProcess($index = null)

{

$process = new swoole_process(function (swoole_process $process) use ($index) {

swoole_set_process_name('download worker' . $index);

echo sprintf('worker:%s, pid:%s', $index, posix_getpid()) . PHP_EOL;

$downloader = null;

// 通信通道加入事件轮训, 进行异步通信

swoole_event_add($process->pipe, function ($pipe) use ($process, &$downloader) {

$data = $process->read();

$data = unserialize($data);

$type = $data['type'];

$data = $data['data'];

// 这里会阻塞掉, 后续改进

if ($type == 'new-segment') {

$downloader = new Downloader($process, $this->client, $data['url'], $data['file'], $data['start'], $data['length']);

$downloader->download();

$process->write(serialize([

'type' => 'taskFinished',

'data' => ''

]));

$downloader = null;

return;

}

if ($type == 'exit') exit(0);

});

}, false, 2);

return $process;

}

}

// 下载器类

class Downloader

{

protected $client; // guzzle实例

protected $process; // 当前进程实例

protected $file; // 文件名

protected $url;

protected $start; // 开始位置

protected $length; // 下载长度

protected $offset; // 已经下到哪一个位置了

public function __construct(swoole_process $process, Client $client, $url, $file, $start, $length)

{

$this->process = $process;

$this->client = $client;

$this->url = $url;

$this->file = $file;

$this->start = $start;

$this->length = $length;

}

public function download()

{

$this->offset = $this->start;

$res = fopen($this->file, 'rb+');

fseek($res, $this->start, SEEK_SET);

$resp = $this->client->request('GET', $this->url, [

'stream' => true,

'headers' => [

'Range' => 'bytes=' . $this->start . '-' . ($this->start + $this->length)

]

]);

$loaded = 0;

while (!$resp->getBody()->eof()) {

// 5 kb 的下载

$size = 1024 * 5;

$data = $resp->getBody()->read($size);

$loaded += strlen($data);

fwrite($res, $data);

$this->process->write(serialize([

'type' => 'range',

'data' => strlen($data)

]));

if ($loaded >= $this->length) break; // eof 貌似不起作用, 手动退出

}

fclose($res);

dump($this->length / 1024 / 1024 . 'MB下载完成');

}

}

// 把文件大小从字节转换为合适的单位

function humanSize($size) {

$units = ['B', 'KB', 'MB', 'GB'];

foreach ($units as $unit) {

if ($size > 1024)

$size /= 1024;

else break;

}

return round($size, 2) . $unit;

}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值