Node.js 淘票票院线电影爬虫

Node.js 淘票票院线电影爬虫

环境

node.js v12.16.3
axios@0.20.0
cheerio@1.0.0-rc.3

npm安装

npm install axios cheerio

实现代码

const cherrio = require("cheerio");
const axios = require("axios");
const fs = require("fs");

axios.get(`https://dianying.taobao.com/showList.htm?spm=a1z21.3046609.w2.3.32c0112aZmGlnh&n_s=new`).then((response) => {
    let $ = cherrio.load(response.data);

    var movieList = []

    $("div.movie-card-wrap").each((index, element) => {
        var movie = {};
        var title = $(element).find(".bt-l").text();
        movie["title"] = title;

        var information = $(element).find(".movie-card-list").text();
        information = information.replace(/\r\n/g, "").replace(/\n/g, "").replace(/\t/g, "").replace(/\ +/g, ";");
        movie["information"] = information;



        var reg = /img width="160" height="224".*? src="(.*?)">/;
        var result = reg.exec(String($(element).html()));
        if (result != null) {
            imgUrl = result[1];
        } else {
            imgUrl = "null";
        }
        movie["image_url"] = imgUrl; 
        // console.log(imgUrl);

        reg = /<a href="(.*?)" class="movie-card-soon">/;
        result = reg.exec(String($(element).html()));
        if (result != null) {
            detailUrl = result[1];
        } else {
            currentMovieReg = /<a href="(.*?)" class="movie-card">/;
            result = currentMovieReg.exec(String($(element).html()));
            if (result != null) {
                detailUrl = result[1];
            } else {
                detailUrl = "null";
            }
        }
        movie["detail_url"] = detailUrl;
        
        movieList.push(movie);
    });

    console.log(movieList);

    fs.writeFile("MaoyanMoive.json", JSON.stringify(movieList, null, "\t"), (err) => {
        if (err == null) {
            console.log("Successfully!");
        } else {
            console.log(err);
        }
    });

});

测试结果

输出JSON格式文件

最后

  • 由于博主水平有限,不免有疏漏之处,欢迎读者随时批评指正,以免造成不必要的误解!
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值