Node.js 淘票票院线电影爬虫
环境
node.js v12.16.3
axios@0.20.0
cheerio@1.0.0-rc.3
npm安装
npm install axios cheerio
实现代码
const cherrio = require("cheerio");
const axios = require("axios");
const fs = require("fs");
axios.get(`https://dianying.taobao.com/showList.htm?spm=a1z21.3046609.w2.3.32c0112aZmGlnh&n_s=new`).then((response) => {
let $ = cherrio.load(response.data);
var movieList = []
$("div.movie-card-wrap").each((index, element) => {
var movie = {};
var title = $(element).find(".bt-l").text();
movie["title"] = title;
var information = $(element).find(".movie-card-list").text();
information = information.replace(/\r\n/g, "").replace(/\n/g, "").replace(/\t/g, "").replace(/\ +/g, ";");
movie["information"] = information;
var reg = /img width="160" height="224".*? src="(.*?)">/;
var result = reg.exec(String($(element).html()));
if (result != null) {
imgUrl = result[1];
} else {
imgUrl = "null";
}
movie["image_url"] = imgUrl;
// console.log(imgUrl);
reg = /<a href="(.*?)" class="movie-card-soon">/;
result = reg.exec(String($(element).html()));
if (result != null) {
detailUrl = result[1];
} else {
currentMovieReg = /<a href="(.*?)" class="movie-card">/;
result = currentMovieReg.exec(String($(element).html()));
if (result != null) {
detailUrl = result[1];
} else {
detailUrl = "null";
}
}
movie["detail_url"] = detailUrl;
movieList.push(movie);
});
console.log(movieList);
fs.writeFile("MaoyanMoive.json", JSON.stringify(movieList, null, "\t"), (err) => {
if (err == null) {
console.log("Successfully!");
} else {
console.log(err);
}
});
});
测试结果
输出JSON
格式文件
最后
- 由于博主水平有限,不免有疏漏之处,欢迎读者随时批评指正,以免造成不必要的误解!