NodeJS爬取数据
![在这里插入图片描述](https://img-blog.csdnimg.cn/c90d2932d03d480988eb5558435fffec.png#pic_center)
const Book = sequelize.define(
"Book",
{
name: {
type: DataTypes.STRING,
allowNull: false,
},
imgurl: {
type: DataTypes.STRING,
},
publishDate: {
type: DataTypes.STRING,
allowNull: false,
},
author: {
type: DataTypes.STRING,
allowNull: false,
},
},
{
freezeTableName: true,
paranoid: true,
},
);
module.exports = Book;
const axios = require("axios").default;
const cheerio = require("cheerio");
const Book = require("../models/Book");
const sequelize = require("./db");
const { DataTypes } = require("sequelize");
async function getBookDetailHTML(url) {
const resp = await axios.get(url);
return resp.data;
}
async function getBookHTML() {
const resp = await axios.get("https://book.douban.com/latest");
return resp.data;
}
async function getBookLinks() {
const html = await getBookHTML();
const $ = cheerio.load(html);
const linkElements = $(".article .chart-dashed-list .media .media__img a");
const links = linkElements
.map((i, elem) => {
return elem.attribs["href"];
})
.get();
return links;
}
async function getBooksDetail() {
const datas = [];
const links = await getBookLinks();
for (let i = 0; i < links.length; i++) {
let data = {};
const html = await getBookDetailHTML(links[i]);
const $ = cheerio.load(html);
const name = $("#wrapper h1 span").text();
const imgurl = $("#mainpic a img")[0].attribs["src"];
const spans = $("#info span.pl");
const authorSpan = spans.filter((i, elem) => {
return $(elem).text().includes("作者");
});
const author = authorSpan.next("a").text();
const publishSpan = spans.filter((i, elem) => {
return $(elem).text().includes("出版年");
});
const publishDate = publishSpan[0].nextSibling.nodeValue.trim();
data = {
name,
imgurl,
author,
publishDate,
};
datas.push(data);
}
return datas;
}
async function save() {
const datas = await getBooksDetail();
const res = await Book.bulkCreate(datas);
console.log("res", res);
}
save();