目的
获取同城旅游酒店评价信息
详细需求
https://www.ly.com/HotelInfo-52003119.html?spm0=10002.2001.1.0.1.4.31
思路解析
一 、F12
从这里已经知道数据的获取方式,请求链接--返回数据--提取数据,
这里需要做的就是构建请求
二、请求分析
三、js调试
到此,所有需要的参数都已经找到了,那么根据需要进行改写本地js调试
四 本地JS调试
function token(e) {
var a56 = {
utf8: {
stringToBytes: function(e) {
return a56.bin.stringToBytes(unescape(encodeURIComponent(e)))
},
bytesToString: function(e) {
return decodeURIComponent(escape(a.bin.bytesToString(e)))
}
},
bin: {
stringToBytes: function(e) {
for (var t = [], a = 0; a < e.length; a++)
t.push(255 & e.charCodeAt(a));
return t
},
bytesToString: function(e) {
for (var t = [], a = 0; a < e.length; a++)
t.push(String.fromCharCode(e[a]));
return t.join("")
}
}
};
// 这里t取任意值都行
// var t = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
var t = null;
var n, i, o, s, r;
// n = a117,
n = {
rotl: function(e, t) {
return e << t | e >>> 32 - t
},
rotr: function(e, t) {
return e << 32 - t | e >>> t
},
endian: function(e) {
if (e.constructor == Number)
return 16711935 & n.rotl(e, 8) | 4278255360 & n.rotl(e, 24);
for (var t = 0; t < e.length; t++)
e[t] = n.endian(e[t]);
return e
},
randomBytes: function(e) {
for (var t = []; e > 0; e--)
t.push(Math.floor(256 * Math.random()));
return t
},
bytesToWords: function(e) {
for (var t = [], a = 0, n = 0; a < e.length; a++,
n += 8)
t[n >>> 5] |= e[a] << 24 - n % 32;
return t
},
wordsToBytes: function(e) {
for (var t = [], a = 0; a < 32 * e.length; a += 8)
t.push(e[a >>> 5] >>> 24 - a % 32 & 255);
return t
},
bytesToHex: function(e) {
for (var t = [], a = 0; a < e.length; a++)
t.push((e[a] >>> 4).toString(16)),
t.push((15 & e[a]).toString(16));
return t.join("")
},
hexToBytes: function(e) {
for (var t = [], a = 0; a < e.length; a += 2)
t.push(parseInt(e.substr(a, 2), 16));
return t
},
bytesToBase64: function(e) {
for (var t = [], n = 0; n < e.length; n += 3)
for (var i = e[n] << 16 | e[n + 1] << 8 | e[n + 2], o = 0; o < 4; o++)
8 * n + 6 * o <= 8 * e.length ? t.push(a.charAt(i >>> 6 * (3 - o) & 63)) : t.push("=");
return t.join("")
},
base64ToBytes: function(e) {
e = e.replace(/[^A-Z0-9+\/]/gi, "");
for (var t = [], n = 0, i = 0; n < e.length; i = ++n % 4)
0 != i && t.push((a.indexOf(e.charAt(n - 1)) & Math.pow(2, -2 * i + 8) - 1) << 2 * i | a.indexOf(e.charAt(n)) >>> 6 - 2 * i);
return t
}
},
i = a56.utf8,
o = null,
s = a56.bin,
(r = function(e, t) {
e.constructor == String ? e = t && "binary" === t.encoding ? s.stringToBytes(e) : i.stringToBytes(e) : o(e) ? e = Array.prototype.slice.call(e, 0) : Array.isArray(e) || (e = e.toString());
for (var a = n.bytesToWords(e), l = 8 * e.length, c = 1732584193, d = -271733879, p = -1732584194, u = 271733878, m = 0; m < a.length; m++)
a[m] = 16711935 & (a[m] << 8 | a[m] >>> 24) | 4278255360 & (a[m] << 24 | a[m] >>> 8);
a[l >>> 5] |= 128 << l % 32,
a[14 + (l + 64 >>> 9 << 4)] = l;
var f = r._ff
, h = r._gg
, v = r._hh
, g = r._ii;
for (m = 0; m < a.length; m += 16) {
var y = c
, _ = d
, b = p
, $ = u;
d = g(d = g(d = g(d = g(d = v(d = v(d = v(d = v(d = h(d = h(d = h(d = h(d = f(d = f(d = f(d = f(d, p = f(p, u = f(u, c = f(c, d, p, u, a[m + 0], 7, -680876936), d, p, a[m + 1], 12, -389564586), c, d, a[m + 2], 17, 606105819), u, c, a[m + 3], 22, -1044525330), p = f(p, u = f(u, c = f(c, d, p, u, a[m + 4], 7, -176418897), d, p, a[m + 5], 12, 1200080426), c, d, a[m + 6], 17, -1473231341), u, c, a[m + 7], 22, -45705983), p = f(p, u = f(u, c = f(c, d, p, u, a[m + 8], 7, 1770035416), d, p, a[m + 9], 12, -1958414417), c, d, a[m + 10], 17, -42063), u, c, a[m + 11], 22, -1990404162), p = f(p, u = f(u, c = f(c, d, p, u, a[m + 12], 7, 1804603682), d, p, a[m + 13], 12, -40341101), c, d, a[m + 14], 17, -1502002290), u, c, a[m + 15], 22, 1236535329), p = h(p, u = h(u, c = h(c, d, p, u, a[m + 1], 5, -165796510), d, p, a[m + 6], 9, -1069501632), c, d, a[m + 11], 14, 643717713), u, c, a[m + 0], 20, -373897302), p = h(p, u = h(u, c = h(c, d, p, u, a[m + 5], 5, -701558691), d, p, a[m + 10], 9, 38016083), c, d, a[m + 15], 14, -660478335), u, c, a[m + 4], 20, -405537848), p = h(p, u = h(u, c = h(c, d, p, u, a[m + 9], 5, 568446438), d, p, a[m + 14], 9, -1019803690), c, d, a[m + 3], 14, -187363961), u, c, a[m + 8], 20, 1163531501), p = h(p, u = h(u, c = h(c, d, p, u, a[m + 13], 5, -1444681467), d, p, a[m + 2], 9, -51403784), c, d, a[m + 7], 14, 1735328473), u, c, a[m + 12], 20, -1926607734), p = v(p, u = v(u, c = v(c, d, p, u, a[m + 5], 4, -378558), d, p, a[m + 8], 11, -2022574463), c, d, a[m + 11], 16, 1839030562), u, c, a[m + 14], 23, -35309556), p = v(p, u = v(u, c = v(c, d, p, u, a[m + 1], 4, -1530992060), d, p, a[m + 4], 11, 1272893353), c, d, a[m + 7], 16, -155497632), u, c, a[m + 10], 23, -1094730640), p = v(p, u = v(u, c = v(c, d, p, u, a[m + 13], 4, 681279174), d, p, a[m + 0], 11, -358537222), c, d, a[m + 3], 16, -722521979), u, c, a[m + 6], 23, 76029189), p = v(p, u = v(u, c = v(c, d, p, u, a[m + 9], 4, -640364487), d, p, a[m + 12], 11, -421815835), c, d, a[m + 15], 16, 530742520), u, c, a[m + 2], 23, -995338651), p = g(p, u = g(u, c = g(c, d, p, u, a[m + 0], 6, -198630844), d, p, a[m + 7], 10, 1126891415), c, d, a[m + 14], 15, -1416354905), u, c, a[m + 5], 21, -57434055), p = g(p, u = g(u, c = g(c, d, p, u, a[m + 12], 6, 1700485571), d, p, a[m + 3], 10, -1894986606), c, d, a[m + 10], 15, -1051523), u, c, a[m + 1], 21, -2054922799), p = g(p, u = g(u, c = g(c, d, p, u, a[m + 8], 6, 1873313359), d, p, a[m + 15], 10, -30611744), c, d, a[m + 6], 15, -1560198380), u, c, a[m + 13], 21, 1309151649), p = g(p, u = g(u, c = g(c, d, p, u, a[m + 4], 6, -145523070), d, p, a[m + 11], 10, -1120210379), c, d, a[m + 2], 15, 718787259), u, c, a[m + 9], 21, -343485551),
c = c + y >>> 0,
d = d + _ >>> 0,
p = p + b >>> 0,
u = u + $ >>> 0
}
return n.endian([c, d, p, u])
}
)._ff = function(e, t, a, n, i, o, s) {
var r = e + (t & a | ~t & n) + (i >>> 0) + s;
return (r << o | r >>> 32 - o) + t
}
,
r._gg = function(e, t, a, n, i, o, s) {
var r = e + (t & n | a & ~n) + (i >>> 0) + s;
return (r << o | r >>> 32 - o) + t
}
,
r._hh = function(e, t, a, n, i, o, s) {
var r = e + (t ^ a ^ n) + (i >>> 0) + s;
return (r << o | r >>> 32 - o) + t
}
,
r._ii = function(e, t, a, n, i, o, s) {
var r = e + (a ^ (t | ~n)) + (i >>> 0) + s;
return (r << o | r >>> 32 - o) + t
}
,
r._blocksize = 16,
r._digestsize = 16
var a = n.wordsToBytes(r(e, t));
return t && t.asBytes ? a : t && t.asString ? s.bytesToString(a) : n.bytesToHex(a)
}
e = (new Date).getTime().toString()
console.log(token(e))
效果实现
pycharm直接运行js代码,需要环境支持,未安装,则百度pycharm安装node.js
五,思路汇总
1.获取token值
2.构建请求链接
3.提取评价数据
源码实现
import time
import requests
import execjs
e = int(time.time() * 1000) # 时间戳
'加载本地js文件'
with open('tongchenglvxing.js', 'r', encoding='utf-8') as f:
ctx = execjs.compile(f.read())
token = ctx.call('token', str(e)) # 调用函数,生成token
# 评价url,可修改酒店ID与页码进行目标获取,若修改酒店id,则referer也需要修改
url = "https://www.ly.com/hotel/api/tmapi/comment/list/?hotelid=52003119&page=2&pageSize=10&commentType=0&roomTypeId=&tripPurposeId=&RankType=1&mainTagId=&subTagId=&antitoken={}".format(
token)
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75 Safari/537.36",
"Referer": "https://www.ly.com/HotelInfo-52003119.html?spm0=10002.2001.1.0.1.4.31",
"Cookie": 'Hm_lvt_64941895c0a12a3bdeb5b07863a52466=1602295824; Hm_lpvt_64941895c0a12a3bdeb5b07863a52466=1602295824; 17uCNRefId=RefId=6928722&SEFrom=baidu&SEKeyWords=; CNSEInfo=RefId=6928722&tcbdkeyid=&SEFrom=baidu&SEKeyWords=&RefUrl=https%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3DuFsbm_4VERms_CCVvyn5vR74d_j8_i5hmKmmvPhj_5S%26wd%3D%26eqid%3D83e1960b000aba54000000065f811809; TicketSEInfo=RefId=6928722&SEFrom=baidu&SEKeyWords=; __tctmu=144323752.0.0; __tctmz=144323752.1602295823130.1.1.utmccn=(organic)|utmcmd=organic|utmEsl=gb2312|utmcsr=baidu|utmctr=; longKey=1602295823689244; __tctrack=0; qdid=-9999; Hm_lvt_c6a93e2a75a5b1ef9fb5d4553a2226e5=1602295833; Hm_lvt_f97c1b2277f4163d4974e7b5c8aa1e96=1602295834; wangba={}'.format(
e) + '; firsttime=1602295835354; sug_act_info=; __tctmc=144323752.9037491; trace_token=; __tctmd=144323752.737325; __tccgd=144323752.0; route=95e291ab3c2e10cfb4323494ebf2a6bd; Hm_lpvt_c6a93e2a75a5b1ef9fb5d4553a2226e5=1602310138; Hm_lpvt_f97c1b2277f4163d4974e7b5c8aa1e96=1602310139; User-Ref-SessionId=fc5c-14fa-7bf9-14e9-015a-1d1e; trace_extend={"deviceid":"1602295823689244","appid":"1","userid":"1602295823689244","orderfromid":"57000","sessionid":"fc5c-14fa-7bf9-14e9-015a-1d1e","pvid":"4cea3129"}; __tctma=144323752.1602295823689244.1602295823130.1602305312867.1602310135480.5; __tctmb=144323752.881068342023288.1602310135480.1602310135480.1; lasttime=1602310899814'
}
response = requests.request("GET", url, headers=headers, verify=False).json()#json解析
res = response.get('response').get('body').get('dpList') #获取评价列表
for i in res:
print(i.get('dpContent'))
效果实现
参考文章
作者:不吃夹生饭
https://zhuanlan.zhihu.com/p/54627024