注意:要建立好对应的数据库字段,必须在cli模式下运行程序
1.通过composer安装phpspider
composer require owner888/phpspider
2.直接上代码
<?phprequire '/vendor/autoload.php';use phpspider\core\phpspider;//调用phpspider的配置$configs = array('name' => '简书','log_show' =>false,'tasknum' => 1,//数据库配置'db_config' => array('host' => '127.0.0.1','port' => 3306,'user' => 'root','pass' => 'root','name' => 'demo',),'export' => array('type' => 'db','table' => 'cof', // 表名),//爬取的域名列表'domains' => array('jianshu','www.jianshu.com'),//抓取的起点'scan_urls' => array('https://www.jianshu.com/c/V2CqjW?utm_medium=index-collections&utm_source=desktop'),//列表页实例'list_url_regexes' => array("https://www.jianshu.com/c/\d+"),//内容页实例// \d+ 指的是变量'content_url_regexes' => array("https://www.jianshu.com/p/\d+",),'max_try' => 5,//数据库字段'fields' => array(array('name' => "title",'selector' => "//h1[@class='title']",'required' => true,),array('name' => "content",'selector' => "//div[@class='show-content-free']",'required' => true,),),);$spider = new phpspider($configs);$spider->start();