const axios = require('axios');
const cheerio = require('cheerio');
const qs = require('qs');
const koa = require('koa');
const request = require('request');//只能用这个模拟浏览器?
const app = new koa()
const fs = require('fs')
const api = axios.create({
baseURL: 'https://www.78pan.com/',
timeout: 1000,
headers: {},
// `transformRequest` 允许在向服务器发送前,修改请求数据
// 只能用在 'PUT', 'POST' 和 'PATCH' 这几个请求方法
// 后面数组中的函数必须返回一个字符串,或 ArrayBuffer,或 Stream
transformRequest: [function (data) {
// 对 data 进行任意转换处理
return data;
}],
// `transformResponse` 在传递给 then/catch 前,允许修改响应数据
transformResponse: [function (data) {
// 对 data 进行任意转换处理
return data;
}],
// `headers` 是即将被发送的自定义请求头
headers: { 'X-Requested-With': 'XMLHttpRequest' },
// `params` 是即将与请求一起发送的 URL 参数
// 必须是一个无格式对象(plain object)或 URLSearchParams 对象
params: {
// ID: 12345
},
// `paramsSerializer` 是一个负责 `params` 序列化的函数
// (e.g. https://www.npmjs.com/package/qs, http://api.jquery.com/jquery.param/)
paramsSerializer: function (params) {
return qs.stringify(params)
},
})
const URL='https://www.78pan.com'
function gethtml(url) {
return new Promise((resolve, rej) => {
request(`https://www.78pan.com/${url}`, (err, res,body) => {
fs.appendFileSync('./err.txt', '\n\r++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n\r')
fs.appendFileSync('./err.txt', JSON.stringify(res.headers))
if (err) {
rej(err);
return;
}
console.log(body)//直接拿到body
resolve(res.body.toString())
})
})
}
// gethtml()
// .then(res=>{
// let $ = cheerio.load(res);//生成类似jquery
// // fs.appendFileSync('./res.txt', $('.logo').find('img').attr('src'))
// $('img').each(function() {
// console.log($(this).attr('src'))
// });
// fs.appendFileSync('./res.text',$('img'))
// fs.appendFileSync('./res.txt', '\n\r++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n\r')
// })
// .catch(err=>{
// })
gethtml('plaza/99')
.then(res=>{
fs.appendFileSync('./res.txt',res)
fs.appendFileSync('./res.txt', '\n\r++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n\r')
let $ = cheerio.load(res);//生成类似jquery
// fs.appendFileSync('./res.txt', $('.logo').find('img').attr('src'))
$('a').each(function() {
fs.appendFileSync('./res.txt',$(this).attr('href'))
fs.appendFileSync('./res.txt', '\n\r')
});
})
.catch(err=>{
})
app.use(async ctx => {
let $ = null
try {
$ = await gethtml();
fs.appendFileSync('./res.txt', $)
fs.appendFileSync('./res.txt', '\n\r++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n\r')
let $2 = cheerio.load($);//生成类似jquery
console.log($2('title').html())
fs.appendFileSync('./res.txt', $2('title').html())
} catch (error) {
fs.appendFileSync('./err.txt', error)
fs.appendFileSync('./err.txt', '\n\r++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n\r')
console.log(error)
}
ctx.body = $ || '嗯嗯'
});
// app.listen(3000, () => {
// });
{
"name": "nodespider",
"version": "1.0.0",
"description": "",
"main": "index.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"keywords": [],
"author": "",
"license": "ISC",
"dependencies": {
"axios": "^0.18.0",
"cheerio": "^1.0.0-rc.2",
"koa": "^2.5.2",
"qs": "^6.5.2",
"request": "^2.87.0"
}
}