-
@Last Modified by: user
-
@Last Modified time: 2018-04-30 22:02:59
*/
var https =require(‘https’);
var http = require(‘http’);
var fs = require(‘fs’);
var request = require(‘request’);
let startPage=0;//从哪一页开始爬
let page=startPage;
let endPage=1;//爬到哪一页
//初始请求地址
var url=‘https://www.toutiao.com/search_content/?offset=’+startPage*20+‘&format=json&keyword=%E8%A1%97%E6%8B%8D&autoload=true&count=20&cur_tab=3&from=gallery’
var i = 0;
//用来判断存储还是访问
var temp=0;
//存储首页url
urlList=[];
//封装了一层函数
function fetchPage(x) {
setTimeout(function(){
startRequest(x); },2000)
}
//首先存储要访问界面的url
function getUrl(x){
temp++;
https.get(x,function(res){
var html = ‘’;
res.setEncoding(‘binary’);
res.on(‘data’, function (chunk) {
html += chunk;
});
res.on(‘end’, function () {
html = JSON.parse(html);//由于获取到的数据是JSON格式的,所以需要JSON.parse方法浅解析
for(let i of html.data){
var obj1={title:i.title,url:i.article_url};
urlList.push(obj1)
}
page++;
if(page<=endPage){
let tempUrl=