node爬虫(二)—— 获取小说章节详情
const http = require('http')
const fs = require('fs')
const cheerio = require('cheerio')
const iconv = require('iconv-lite')
const url = 'http://www.biquge.com/91_91711/4720017.html'
let i = 1
let num = 3
function main (url) {
startRequest(url)
}
function startRequest (url) {
http.get(url, res => {
const html = []
res.on('data', (chunk) => {
html.push(chunk)
})
res.on('end', () => {
const html1 = iconv.decode(Buffer.concat(html), 'utf-8')
const $ = cheerio.load(html1, { decodeEntities: false })
const title = $('.bookname h1').text()
const arr = []
const content = $("#content").html()
const contentArr = content.split('<br><br>')
contentArr.forEach(elem => {
const data = trim(elem.toString())
arr.push(data)
})
const bookName = $(".con_top a").eq(1).text()
const obj = {
id: i,
bookName: bookName,
title: title,
content: arr
}
const link = $(".bottem2 a").eq(2).attr('href')
const nextLink = `http://www.biquge.com/${link}`
saveContent(obj, nextLink)
console.log(`第${i + 1}章:${nextLink}`)
i++
if (i <= num) {
setTimeout(() => {
main(nextLink)
}, 1000)
}
})
})
}
function saveContent (obj, nextLink) {
console.log(`${i}--${obj.title}`)
if (!fs.existsSync(`data/${obj.bookName}`)) {
fs.mkdirSync(`data/${obj.bookName}`);
fs.writeFile(`data/${obj.bookName}/chapter.json`, "", (err) => {
if (err) throw err;
})
}
fs.appendFile(`./data/${obj.bookName}/chapter.json`, JSON.stringify(obj) + ",", 'utf-8', err => {
if (err) throw err
})
}
function trim (str) {
return str.replace(/(^\s*)|(\s*$)/g, '').replace(/ /g, '')
}
main(url)