python爬虫进阶-同城旅游酒店评价(JS逆向)

目的

获取同城旅游酒店评价信息

详细需求

https://www.ly.com/HotelInfo-52003119.html?spm0=10002.2001.1.0.1.4.31

在这里插入图片描述

思路解析

一 、F12
在这里插入图片描述

从这里已经知道数据的获取方式,请求链接--返回数据--提取数据,
这里需要做的就是构建请求

二、请求分析
在这里插入图片描述
三、js调试

在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述

在这里插入图片描述
在这里插入图片描述

到此,所有需要的参数都已经找到了,那么根据需要进行改写本地js调试

四 本地JS调试

function token(e) {
    var a56 = {
        utf8: {
            stringToBytes: function(e) {
                return a56.bin.stringToBytes(unescape(encodeURIComponent(e)))
            },
            bytesToString: function(e) {
                return decodeURIComponent(escape(a.bin.bytesToString(e)))
            }
        },
        bin: {
            stringToBytes: function(e) {
                for (var t = [], a = 0; a < e.length; a++)
                    t.push(255 & e.charCodeAt(a));
                return t
            },
            bytesToString: function(e) {
                for (var t = [], a = 0; a < e.length; a++)
                    t.push(String.fromCharCode(e[a]));
                return t.join("")
            }
        }
    };
    // 这里t取任意值都行
    // var t = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
    var t = null;
    var n, i, o, s, r;
    // n = a117,
    n = {
        rotl: function(e, t) {
            return e << t | e >>> 32 - t
        },
        rotr: function(e, t) {
            return e << 32 - t | e >>> t
        },
        endian: function(e) {
            if (e.constructor == Number)
                return 16711935 & n.rotl(e, 8) | 4278255360 & n.rotl(e, 24);
            for (var t = 0; t < e.length; t++)
                e[t] = n.endian(e[t]);
            return e
        },
        randomBytes: function(e) {
            for (var t = []; e > 0; e--)
                t.push(Math.floor(256 * Math.random()));
            return t
        },
        bytesToWords: function(e) {
            for (var t = [], a = 0, n = 0; a < e.length; a++,
            n += 8)
                t[n >>> 5] |= e[a] << 24 - n % 32;
            return t
        },
        wordsToBytes: function(e) {
            for (var t = [], a = 0; a < 32 * e.length; a += 8)
                t.push(e[a >>> 5] >>> 24 - a % 32 & 255);
            return t
        },
        bytesToHex: function(e) {
            for (var t = [], a = 0; a < e.length; a++)
                t.push((e[a] >>> 4).toString(16)),
                t.push((15 & e[a]).toString(16));
            return t.join("")
        },
        hexToBytes: function(e) {
            for (var t = [], a = 0; a < e.length; a += 2)
                t.push(parseInt(e.substr(a, 2), 16));
            return t
        },
        bytesToBase64: function(e) {
            for (var t = [], n = 0; n < e.length; n += 3)
                for (var i = e[n] << 16 | e[n + 1] << 8 | e[n + 2], o = 0; o < 4; o++)
                    8 * n + 6 * o <= 8 * e.length ? t.push(a.charAt(i >>> 6 * (3 - o) & 63)) : t.push("=");
            return t.join("")
        },
        base64ToBytes: function(e) {
            e = e.replace(/[^A-Z0-9+\/]/gi, "");
            for (var t = [], n = 0, i = 0; n < e.length; i = ++n % 4)
                0 != i && t.push((a.indexOf(e.charAt(n - 1)) & Math.pow(2, -2 * i + 8) - 1) << 2 * i | a.indexOf(e.charAt(n)) >>> 6 - 2 * i);
            return t
        }
    },
    i = a56.utf8,
    o = null,
    s = a56.bin,
    (r = function(e, t) {
        e.constructor == String ? e = t && "binary" === t.encoding ? s.stringToBytes(e) : i.stringToBytes(e) : o(e) ? e = Array.prototype.slice.call(e, 0) : Array.isArray(e) || (e = e.toString());
        for (var a = n.bytesToWords(e), l = 8 * e.length, c = 1732584193, d = -271733879, p = -1732584194, u = 271733878, m = 0; m < a.length; m++)
            a[m] = 16711935 & (a[m] << 8 | a[m] >>> 24) | 4278255360 & (a[m] << 24 | a[m] >>> 8);
        a[l >>> 5] |= 128 << l % 32,
        a[14 + (l + 64 >>> 9 << 4)] = l;
        var f = r._ff
          , h = r._gg
          , v = r._hh
          , g = r._ii;
        for (m = 0; m < a.length; m += 16) {
            var y = c
              , _ = d
              , b = p
              , $ = u;
            d = g(d = g(d = g(d = g(d = v(d = v(d = v(d = v(d = h(d = h(d = h(d = h(d = f(d = f(d = f(d = f(d, p = f(p, u = f(u, c = f(c, d, p, u, a[m + 0], 7, -680876936), d, p, a[m + 1], 12, -389564586), c, d, a[m + 2], 17, 606105819), u, c, a[m + 3], 22, -1044525330), p = f(p, u = f(u, c = f(c, d, p, u, a[m + 4], 7, -176418897), d, p, a[m + 5], 12, 1200080426), c, d, a[m + 6], 17, -1473231341), u, c, a[m + 7], 22, -45705983), p = f(p, u = f(u, c = f(c, d, p, u, a[m + 8], 7, 1770035416), d, p, a[m + 9], 12, -1958414417), c, d, a[m + 10], 17, -42063), u, c, a[m + 11], 22, -1990404162), p = f(p, u = f(u, c = f(c, d, p, u, a[m + 12], 7, 1804603682), d, p, a[m + 13], 12, -40341101), c, d, a[m + 14], 17, -1502002290), u, c, a[m + 15], 22, 1236535329), p = h(p, u = h(u, c = h(c, d, p, u, a[m + 1], 5, -165796510), d, p, a[m + 6], 9, -1069501632), c, d, a[m + 11], 14, 643717713), u, c, a[m + 0], 20, -373897302), p = h(p, u = h(u, c = h(c, d, p, u, a[m + 5], 5, -701558691), d, p, a[m + 10], 9, 38016083), c, d, a[m + 15], 14, -660478335), u, c, a[m + 4], 20, -405537848), p = h(p, u = h(u, c = h(c, d, p, u, a[m + 9], 5, 568446438), d, p, a[m + 14], 9, -1019803690), c, d, a[m + 3], 14, -187363961), u, c, a[m + 8], 20, 1163531501), p = h(p, u = h(u, c = h(c, d, p, u, a[m + 13], 5, -1444681467), d, p, a[m + 2], 9, -51403784), c, d, a[m + 7], 14, 1735328473), u, c, a[m + 12], 20, -1926607734), p = v(p, u = v(u, c = v(c, d, p, u, a[m + 5], 4, -378558), d, p, a[m + 8], 11, -2022574463), c, d, a[m + 11], 16, 1839030562), u, c, a[m + 14], 23, -35309556), p = v(p, u = v(u, c = v(c, d, p, u, a[m + 1], 4, -1530992060), d, p, a[m + 4], 11, 1272893353), c, d, a[m + 7], 16, -155497632), u, c, a[m + 10], 23, -1094730640), p = v(p, u = v(u, c = v(c, d, p, u, a[m + 13], 4, 681279174), d, p, a[m + 0], 11, -358537222), c, d, a[m + 3], 16, -722521979), u, c, a[m + 6], 23, 76029189), p = v(p, u = v(u, c = v(c, d, p, u, a[m + 9], 4, -640364487), d, p, a[m + 12], 11, -421815835), c, d, a[m + 15], 16, 530742520), u, c, a[m + 2], 23, -995338651), p = g(p, u = g(u, c = g(c, d, p, u, a[m + 0], 6, -198630844), d, p, a[m + 7], 10, 1126891415), c, d, a[m + 14], 15, -1416354905), u, c, a[m + 5], 21, -57434055), p = g(p, u = g(u, c = g(c, d, p, u, a[m + 12], 6, 1700485571), d, p, a[m + 3], 10, -1894986606), c, d, a[m + 10], 15, -1051523), u, c, a[m + 1], 21, -2054922799), p = g(p, u = g(u, c = g(c, d, p, u, a[m + 8], 6, 1873313359), d, p, a[m + 15], 10, -30611744), c, d, a[m + 6], 15, -1560198380), u, c, a[m + 13], 21, 1309151649), p = g(p, u = g(u, c = g(c, d, p, u, a[m + 4], 6, -145523070), d, p, a[m + 11], 10, -1120210379), c, d, a[m + 2], 15, 718787259), u, c, a[m + 9], 21, -343485551),
            c = c + y >>> 0,
            d = d + _ >>> 0,
            p = p + b >>> 0,
            u = u + $ >>> 0
        }
        return n.endian([c, d, p, u])
    }
    )._ff = function(e, t, a, n, i, o, s) {
        var r = e + (t & a | ~t & n) + (i >>> 0) + s;
        return (r << o | r >>> 32 - o) + t
    }
    ,
    r._gg = function(e, t, a, n, i, o, s) {
        var r = e + (t & n | a & ~n) + (i >>> 0) + s;
        return (r << o | r >>> 32 - o) + t
    }
    ,
    r._hh = function(e, t, a, n, i, o, s) {
        var r = e + (t ^ a ^ n) + (i >>> 0) + s;
        return (r << o | r >>> 32 - o) + t
    }
    ,
    r._ii = function(e, t, a, n, i, o, s) {
        var r = e + (a ^ (t | ~n)) + (i >>> 0) + s;
        return (r << o | r >>> 32 - o) + t
    }
    ,
    r._blocksize = 16,
    r._digestsize = 16

    var a = n.wordsToBytes(r(e, t));
    return t && t.asBytes ? a : t && t.asString ? s.bytesToString(a) : n.bytesToHex(a)

}

e = (new Date).getTime().toString()

console.log(token(e))

效果实现

pycharm直接运行js代码,需要环境支持,未安装,则百度pycharm安装node.js

在这里插入图片描述
五,思路汇总

1.获取token值
2.构建请求链接
3.提取评价数据

源码实现

import time
import requests
import execjs

e = int(time.time() * 1000)  # 时间戳

'加载本地js文件'
with open('tongchenglvxing.js', 'r', encoding='utf-8') as f:
    ctx = execjs.compile(f.read())

token = ctx.call('token', str(e))  # 调用函数,生成token

# 评价url,可修改酒店ID与页码进行目标获取,若修改酒店id,则referer也需要修改
url = "https://www.ly.com/hotel/api/tmapi/comment/list/?hotelid=52003119&page=2&pageSize=10&commentType=0&roomTypeId=&tripPurposeId=&RankType=1&mainTagId=&subTagId=&antitoken={}".format(
    token)

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75 Safari/537.36",
    "Referer": "https://www.ly.com/HotelInfo-52003119.html?spm0=10002.2001.1.0.1.4.31",
    "Cookie": 'Hm_lvt_64941895c0a12a3bdeb5b07863a52466=1602295824; Hm_lpvt_64941895c0a12a3bdeb5b07863a52466=1602295824; 17uCNRefId=RefId=6928722&SEFrom=baidu&SEKeyWords=; CNSEInfo=RefId=6928722&tcbdkeyid=&SEFrom=baidu&SEKeyWords=&RefUrl=https%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3DuFsbm_4VERms_CCVvyn5vR74d_j8_i5hmKmmvPhj_5S%26wd%3D%26eqid%3D83e1960b000aba54000000065f811809; TicketSEInfo=RefId=6928722&SEFrom=baidu&SEKeyWords=; __tctmu=144323752.0.0; __tctmz=144323752.1602295823130.1.1.utmccn=(organic)|utmcmd=organic|utmEsl=gb2312|utmcsr=baidu|utmctr=; longKey=1602295823689244; __tctrack=0; qdid=-9999; Hm_lvt_c6a93e2a75a5b1ef9fb5d4553a2226e5=1602295833; Hm_lvt_f97c1b2277f4163d4974e7b5c8aa1e96=1602295834; wangba={}'.format(
        e) + '; firsttime=1602295835354; sug_act_info=; __tctmc=144323752.9037491; trace_token=; __tctmd=144323752.737325; __tccgd=144323752.0; route=95e291ab3c2e10cfb4323494ebf2a6bd; Hm_lpvt_c6a93e2a75a5b1ef9fb5d4553a2226e5=1602310138; Hm_lpvt_f97c1b2277f4163d4974e7b5c8aa1e96=1602310139; User-Ref-SessionId=fc5c-14fa-7bf9-14e9-015a-1d1e; trace_extend={"deviceid":"1602295823689244","appid":"1","userid":"1602295823689244","orderfromid":"57000","sessionid":"fc5c-14fa-7bf9-14e9-015a-1d1e","pvid":"4cea3129"}; __tctma=144323752.1602295823689244.1602295823130.1602305312867.1602310135480.5; __tctmb=144323752.881068342023288.1602310135480.1602310135480.1; lasttime=1602310899814'

}

response = requests.request("GET", url, headers=headers, verify=False).json()#json解析
res = response.get('response').get('body').get('dpList')  #获取评价列表
for i in res:
    print(i.get('dpContent'))

效果实现
在这里插入图片描述

参考文章
作者:不吃夹生饭
https://zhuanlan.zhihu.com/p/54627024
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

jia666666

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值