做了个爬虫,因为PHP是单线程,所以爬取速度较慢,故使用了pthreads多线程,实现多线程爬取
pthreads扩展下载地址:http://windows.php.net/downloads/pecl/releases/pthreads
扩展文档:http://docs.php.net/manual/zh/book.pthreads.php
多线程代码
namespace app\api\controller\v1;
use think\Db; //此处的Db类都以失效,试了多种引入方式都不行
use think\Cache; //同理
use think\Controller;
class Curl extends \Thread
{
public $url;
public $result;
public function __construct($url) {
$this->url = $url;
}
//线程运行
public function run() {
if ($this->url) {
$this->result = $this->doshu($this->url);
}
}
public function doshu($url){
return file_get_contents($url); //所需要访问的网址
}
}
遇到的问题,线程中使用不了数据库,不知道什么原因,知道的大神可以解释一下,我用了访问内部的url来实现数据写入
public function doZhiHu1(){
//多个本地网址
$urls = array('http://localhost/shopapi/api/v1.index/doZhiHu2',
'http://localhost/shopapi/api/v1.index/doZhiHu3',
'http://localhost/shopapi/api/v1.index/doZhiHu4',
'http://localhost/shopapi/api/v1.index/doZhiHu5',
'http://localhost/shopapi/api/v1.index/doZhiHu6');
foreach ($urls as $key=>$url) {
$workers[$key] = new curl($url); //new一个新的线程
$workers[$key]->start(); //开始运行
}
foreach ($workers as $key=>$worker) {
while($workers[$key]->isRunning()) { //查看线程的状态
usleep(100);
}
if ($workers[$key]->join()) { //等待线程执行结束
var_dump($workers[$key]->result);
}
}
}