axios和cheerio
例子,对豆瓣推荐的书籍进行数据抓取,url:https://book.douban.com/latest
const axios = require('axios').default;
const cheerio = require('cheerio');
const Boook = require('../models/Book');
async function getBooksHTML() {
const resp = await axios.get("https://book.douban.com/latest");
return resp.data;
}
// 拿到书籍的详细地址
async function getBooksList() {
const html = await getBooksHTML();
const $ = cheerio.load(html);
const lis = $('#content .grid-12-12 li a.cover');
const links = lis.map((i, ele) => {
const href = ele.attribs["href"];
return href;
}).get();
return links;
}
// getBooksList().then(resp => {
// console.log(resp);
// });
// 通过书记的详细地址拿到书籍的详细信息
async function getBookDetail(detailUrl) {
const resp = await axios(detailUrl);
const $ = cheerio.load(resp.data);
const name = $("h1").text().trim();
const imgurl = $('#mainpic .nbg img').attr('src');
const spans = $("#info span.pl");
const authorSpan = spans.filter((i, ele) => {
return $(ele).text().includes("作者");
})
const pubilshSpan = spans.filter((i, ele) => {
return $(ele).text().includes("出版年");
})
const publishDate = pubilshSpan[0].nextSibling.nodeValue.trim();
const author = authorSpan.next("a").text();
return {
author,
name,
imgurl,
publishDate
}
}
//拿到详细信息的总和
async function fetchAllBooks() {
const links = await getBooksList();//得到书籍详情页数组
const promise = links.map(link => {
return getBookDetail(link)
});
return Promise.all(promise);
}
//存入数据库
async function saveToDB() {
const books = await fetchAllBooks();
Boook.bulkCreate(books);
console.log('书籍信息更新到数据库');
}
saveToDB();
Navicat book表: