主文件
<?php
/**
* 同步爬虫
*/
require_once 'Crawler.php';
$start = microtime(true);//unix微妙数
$url = 'http://www.swoole.com/';
$ins = new Crawler($url);
$ins->visitOneDegree();
$timeUsed = microtime(true) - $start;
echo "time used: " . $timeUsed;
爬虫类
<?php
/**
* Class Crawler
* Path: /Sync/Crawler.php
*/
class Crawler
{
private $url;
private $toVisit = [];
public function __construct($url)
{
$this->url = $url;
}
public function visitOneDegree()
{
$this->loadPageUrls();
$this->visitAll();
}
private function loadPageUrls()
{
$content = $this->visit($this->url);
$pattern = '#((http|ftp)://(\S*?\.\S*?))([\s)\[\]{},;"\':<]|\.\s|$)#i';
preg_match_all($pattern, $content, $matched);//正则匹配
foreach ($matched[0] as $url) {
if (in_array($url, $this->toVisit)) {
continue;
}
echo $url;echo "<br/>";
$this->toVisit[] = $url;
}
}
private function visitAll()
{
foreach ($this->toVisit as $url) {
$this->visit($url);
}
}
private function visit($url)
{
return @file_get_contents($url);//读取地址内容
}
}
爬取结果
http://wiki.swoole.com/"
http://compiler.swoole.com/"
http://wiki.swoole.com/wiki/page/prid-1-p-project/change_log.html"
http://group.swoole.com"
http://wiki.swoole.com/wiki/page/prid-1-p-author.html"
http://wiki.swoole.com/wiki/page/p-case.html"
http://wiki.swoole.com/wiki/page/p-donate.html"
http://127.0.0.1:
time used: 275.22750902176
参考地址
https://www.jianshu.com/p/8035266e8f42
https://www.w3cschool.cn/swoole/swoole-functions.html
https://wiki.swoole.com/wiki/page/245.html