const https = require("https");
const cheerio = require("cheerio");
const fs = require('fs');
let url = "https://www.csdn.net/nav/web"
https.get(url, res => {
let chunks = [];
let size = 0;
res.on('data', function (chunk) {
// console.log(chunks);
chunks.push(chunk)
size += chunk.length
});
res.on('end', function () {
console.log('数据包传输完毕');
let data = Buffer.concat(chunks, size);
// console.log(data);
let html = data.toString();
// console.log(html);
let $ = cheerio.load(html)
let result = []
$("#feedlist_id").find(".list_con").each( i => {
let map = {};
// console.log($(".list_con").eq(i).children(".title").children("h2").children("a").text())
map.name = $(".list_con").eq(i).children(".title").children("h2").children("a").text().trim()
map.info = $(".list_con").eq(i).children(".summary").text().trim()
result.push(map)
})
// console.log(result)
fs.writeFile('./csdn.txt', JSON.stringify(result), { 'flag': 'a' }, function(err) {
if(err) console.log(err);
console.log('写入成功');
});
});
})
基于node的一个小爬虫
最新推荐文章于 2024-09-21 20:39:11 发布