//引入模块
const http = require('http')
const fs = require('fs')
const cheerio = require('cheerio')
const iconv = require('iconv-lite')
const request = require('request')
const async = require('async');
const urlList = [] //地址列表
var id = 0 //计数器
//由于煎蛋妹子图的地址格式一样,所以只要拼接地址就好了
for (var i = 193; i > 190; i--) {
urlList.push('http://jandan.net/ooxx/page-' + i)
}
function getPages(url, callback) {
http.get(url, res => {
const html = []
res.on('data', (chunk) => {
html.push(chunk)
})
res.on('end', () => {
//如果网站是gbk编码,可以转成utf8,否则可能乱码
const html1 = iconv.decode(Buffer.concat(html), 'utf8')
//cheerio模块用来一jQuery的语法解析爬取的页面
const $ = cheerio.load(html1, {
decodeEntities: false
})
const link = []
$('.view_img_link').each((i, v) => {
link.push($(v).attr('href'))
})
callback(null, link)
})
})
}
function saveImage(links) {
if (!fs.existsSync(`images/jiandan`)) {
fs.mkdirSync(`images/jiandan`)
}
//此处links为一个二维数组,每个元素也是数组,保存了每个地址的多张图片地址,所以需要合并
var newArr = []
for (var i = 0; i < links.length; i++) {
newArr = newArr.concat(links[i])
}
console.log('length:' + newArr.length)
async.mapLimit(newArr, 5, function (link, callback) {
id++ //id充当计数器,保存一张图片就打印一次
requestAndwrite(link, callback, id)
}, function (err, results) {
if (err) {
console.log(err)
} else {
console.log(results)
}
})
}
var requestAndwrite = function (link, callback, id) {
const url = 'http:' + link
request.head(url, function (err, res, body) {
if (err) {
console.log(err)
} else {
request(url).pipe(fs.createWriteStream(`images/jiandan/${link.split('/').pop()}`)).on('close', function () {
console.log(id)
callback(null, 'result')
})
}
})
}
//
async.mapLimit(urlList, 5, function (url, callback) {
getPages(url, callback)
}, function (err, links) {
saveImage(links)
})