报错界面如下:
主要错误就是:在我的代码第三行,有一个require错误
require.'../vendor/owner888/phpspider/autoloader.php';
原先我是这样写的,但是使用命令行去跑,就出错了,原因就在于这个require在dos的情况下,phpspider的引入的路径不对
然后后面改成了下面这样子,吧目录给定义成绝对目录,这样就木有问题了.
require__DIR__.'/../vendor/owner888/phpspider/autoloader.php';
这样子,就可以继续接着跑啦.
附带采集源码一份.
require __DIR__.'/../vendor/owner888/phpspider/autoloader.php';
require_once 'config.php';
use phpspider\core\phpspider;
use phpspider\core\db;
//下面这个注释删了就跑不起来.....
/* Do NOT delete this comment */
/* 不要删除这段注释 */
$configs = array(
'name' => '特玩',
'log_show' => true,
'tasknum' => 3,//爬虫任务数
'output_encoding' => 'UTF-8',
//'save_running_state' => true,
//主域名
'domains' =>[
'www.te5.com',
'te5.com'
],
//入口地址
'scan_urls' => [
'http://www.te5.com/btgame/zkyx/',//折扣游戏
'http://www.te5.com/btgame/h5/',//h5
],
//列表url 匹配规则
'list_url_regexes' => [
// "/btshouyou/list_\d+_\d+.html",
],
//详情页url 匹配规则
'content_url_regexes' =>[
"/btshouyou/\d+.html"
],
'db_config' => $config,
'fields' => [
[
'name' => "type_name",//类型
'selector' => "//div[@class='bread']/a[last()-1]",
'required' => true,
],
[
'name' => "tags_name",//标签
'selector' => "//div[@id='tezheng']/@data-tezheng",
'required' => false,
],
[
'name' => "title",//标题
'selector' => "//div[@class='left']//h1",
'required' => true,
],
[
'name' => "time",//时间
'selector' => "//ul[@class='i1']//li[i='日期:']/span",
'required' => true,
],
[
'name' => "size",//大小
'selector' => "//ul[@class='i1']//li[i='大小:']/span",
'required' => false,
],
[
'name' => "content",//内容
'selector' => "//div[@class='game_content']",
'required' => true,
],
[
'name' => "and_url",//安卓下载路径
'selector' => "//a[@class='and_btn']/@href",
'required' => false,
],
[
'name' => "ios_url",//苹果下载路径
'selector' => "//a[@class='ios_btn']/@href",
'required' => false,
],
[
'name' => "pc_url",//pc端下载路径
'selector' => "//a[@class='pc_btn']/@href",
'required' => false,
],
[
'name' => "img",//图片
'selector' => "//div[@class='img']//img/@src",
'required' => false,
],
[
'name' => "keywords",//关键词
'selector' => "//meta[@name='Keywords']/@content",
'required' => false,
],
[
'name' => "description",//描述
'selector' => "//meta[@name='description']/@content",
'required' => false,
],
[
'name' => "comments",//评论
'selector' => "//div[@class='comments']",
'required' => false,
],
],
);
//实例化蜘蛛
$spider=new phpspider($configs);
//采集初始=>数据库初始化
$spider->on_start = function($phpspider)
{
$db_config = $phpspider->get_config("db_config");
// 数据库连接
db::set_connect('default', $db_config);
db::_init();
};
//详情页=>提取字段=>处理字段
$spider->on_extract_field = function($fieldname, $data, $page)
{
if ($fieldname == 'content' || $fieldname == 'comments')
{
$content = json_encode($data,JSON_UNESCAPED_UNICODE);
$content_gz = gzcompress($content);
$data = base64_encode($content_gz);
}
return $data;
};
//详情页=>提取最终的数据=>入库
$spider->on_extract_page = function($page, $data)
{
$savedata =[];
$savedata["cate_name"]="BT";
$savedata["app_name"]=$data["title"];
$savedata["type_name"]=$data["type_name"];
$savedata["tags_name"]=$data["tags_name"];
$savedata['create_time'] =$data["time"];
$savedata["content"]=$data["content"];
$savedata["comments"]=$data["comments"];
$savedata["and_btn"]=$data["and_url"];
$savedata["ios_btn"]=$data["ios_url"];
$savedata["pc_btn"]=$data["pc_url"];
$savedata["img"]=$data["img"];
$savedata["keywords"]=$data["keywords"];
$savedata["description"]=$data["description"];
$savedata["size"]=$data["size"];
$savedata['status'] = 1;//状态
$kwds1="传奇";
$kwds2="sf";
$str=strip_tags($savedata['app_name']);
$has_kwds=false;
if(strpos($str,$kwds1)!==false){
$has_kwds=true;
}
if(strpos($str,$kwds2)!==false){
$has_kwds=true;
}
//如果采集的数据content字段不为空,直接插入数据库
if($savedata['content']!==''&&$has_kwds===false){
$sql = "SELECT id from yx_app WHERE app_name='".$savedata['app_name']."';";
$row = db::get_one($sql);
if(empty($row)){
db::insert("yx_app", $savedata);//数据库保存
echo "数据已入库";
}else{
echo '数据重复,已跳过。。。。。';
}
}
return $data;
};
$spider->on_list_page = function($page, $content, $phpspider)
{
echo 'on_list_page:'.var_dump($page);
};
$spider->start();