2024年最新【app爬虫】某宝详情页《问大家》数据采集分析(2)

i 是上面固定参数中的 t,当前的时间戳

g是上面固定参数中的 appKey

c.data 是上面固定参数中的data


所以下一步要知道d.token是如何生成的


#### token的生成规则


还是使用在js资源文件中全局搜索token的方式,发现了相关的js代码



n.prototype.__getTokenFromCookie = function() {
    var a = this.options;
    return a.CDR && j(y) ? a.token = j(y).split(";")[0] : a.token = a.token || j(z),
    a.token && (a.token = a.token.split("_")[0]),
    o.resolve()
}

通过对这段js代码分析,发现token原来是从cookie中提取的:



_m_h5_tk=1632671ce1a89f17ac8d06525e4b5226_1555062955474


至此,全部的参数都已经知道生成规则了,接下来可以编写python代码请求数据了。


### sign 生成


关于sign的生成公式:


md5Hex(token&t&appKey&data)  
 如:md5Hex(“645d1f414d4914297dfaab40f3f76016 &1234&4272&{“itemNumId”:“1500011132496”}”)  
 sign=d2b2f818a03496b296b899a230c03abd


### 计算js



function get_sign_demo(the_params_for_signing) {function t(e, t) {return e << t | e >>> 32 - t
}function n(e, t) {var n, o, i, r, s;return i = 2147483648 & e, r = 2147483648 & t, n = 1073741824 & e, o = 1073741824 & t, s = (1073741823 & e) + (1073741823 & t), n & o ? 2147483648 ^ s ^ i ^ r : n | o ? 1073741824 & s ? 3221225472 ^ s ^ i ^ r : 1073741824 ^ s ^ i ^ r : s ^ i ^ r
}function o(e, t, n) {return e & t | ~e & n
}function i(e, t, n) {return e & n | t & ~n
}function r(e, t, n) {return e ^ t ^ n
}function s(e, t, n) {return t ^ (e | ~n)
}function a(e, i, r, s, a, p, u) {return e = n(e, n(n(o(i, r, s), a), u)), n(t(e, p), i)
}function p(e, o, r, s, a, p, u) {return e = n(e, n(n(i(o, r, s), a), u)), n(t(e, p), o)
}function u(e, o, i, s, a, p, u) {return e = n(e, n(n(r(o, i, s), a), u)), n(t(e, p), o)
}function c(e, o, i, r, a, p, u) {return e = n(e, n(n(s(o, i, r), a), u)), n(t(e, p), o)
}function d(e) {for (var t, n = e.length, o = n + 8, i = (o - o % 64) / 64, r = 16 * (i + 1), s = new Array(r - 1), a = 0, p = 0; n > p;) {
t = (p - p % 4) / 4, a = p % 4 * 8, s[t] = s[t] | e.charCodeAt§ << a, p++
}return t = (p - p % 4) / 4, a = p % 4 * 8, s[t] = s[t] | 128 << a, s[r - 2] = n << 3, s[r - 1] = n >>> 29, s
}function l(e) {var t, n, o = “”,
i = “”;for (n = 0; 3 >= n; n++) {
t = e >>> 8 * n & 255, i = “0” + t.toString(16), o += i.substr(i.length - 2, 2)
}return o
}function f(e) {
e = e.replace(/\r\n/g, “\n”);for (var t = “”, n = 0; n < e.length; n++) {var o = e.charCodeAt(n);128 > o ? t += String.fromCharCode(o) : o > 127 && 2048 > o ? (t += String.fromCharCode(o >> 6 | 192), t += String.fromCharCode(63 & o | 128)) : (t += String.fromCharCode(o >> 12 | 224), t += String.fromCharCode(o >> 6 & 63 | 128), t += String.fromCharCode(63 & o | 128))
}return t
}var m, h, g, v, _, y, R, w, E, S = [],
O = 7,
b = 12,
T = 17,
q = 22,
A = 5,
x = 9,
C = 14,
N = 20,
J = 4,
k = 11,
L = 16,
D = 23,
I = 6,
P = 10,
F = 15,
j = 21;for (the_params_for_signing = f(the_params_for_signing), S = d(the_params_for_signing), y = 1732584193, R = 4023233417, w = 2562383102, E = 271733878, m = 0; m < S.length; m += 16) {
h = y, g = R, v = w, _ = E, y = a(y, R, w, E, S[m + 0], O, 3614090360), E = a(E, y, R, w, S[m + 1], b, 3905402710), w = a(w, E, y, R, S[m + 2], T, 606105819), R = a(R, w, E, y, S[m + 3], q, 3250441966), y = a(y, R, w, E, S[m + 4], O, 4118548399), E = a(E, y, R, w, S[m + 5], b, 1200080426), w = a(w, E, y, R, S[m + 6], T, 2821735955), R = a(R, w, E, y, S[m + 7], q, 4249261313), y = a(y, R, w, E, S[m + 8], O, 1770035416), E = a(E, y, R, w, S[m + 9], b, 2336552879), w = a(w, E, y, R, S[m + 10], T, 4294925233), R = a(R, w, E, y, S[m + 11], q, 2304563134), y = a(y, R, w, E, S[m + 12], O, 1804603682), E = a(E, y, R, w, S[m + 13], b, 4254626195), w = a(w, E, y, R, S[m + 14], T, 2792965006), R = a(R, w, E, y, S[m + 15], q, 1236535329), y = p(y, R, w, E, S[m + 1], A, 4129170786), E = p(E, y, R, w, S[m + 6], x, 3225465664), w = p(w, E, y, R, S[m + 11], C, 643717713), R = p(R, w, E, y, S[m + 0], N, 3921069994), y = p(y, R, w, E, S[m + 5], A, 3593408605), E = p(E, y, R, w, S[m + 10], x, 38016083), w = p(w, E, y, R, S[m + 15], C, 3634488961), R = p(R, w, E, y, S[m + 4], N, 3889429448), y = p(y, R, w, E, S[m + 9], A, 568446438), E = p(E, y, R, w, S[m + 14], x, 3275163606), w = p(w, E, y, R, S[m + 3], C, 4107603335), R = p(R, w, E, y, S[m + 8], N, 1163531501), y = p(y, R, w, E, S[m + 13], A, 2850285829), E = p(E, y, R, w, S[m + 2], x, 4243563512), w = p(w, E, y, R, S[m + 7], C, 1735328473), R = p(R, w, E, y, S[m + 12], N, 2368359562), y = u(y, R, w, E, S[m + 5], J, 4294588738), E = u(E, y, R, w, S[m + 8], k, 2272392833), w = u(w, E, y, R, S[m + 11], L, 1839030562), R = u(R, w, E, y, S[m + 14], D, 4259657740), y = u(y, R, w, E, S[m + 1], J, 2763975236), E = u(E, y, R, w, S[m + 4], k, 1272893353), w = u(w, E, y, R, S[m + 7], L, 4139469664), R = u(R, w, E, y, S[m + 10], D, 3200236656), y = u(y, R, w, E, S[m + 13], J, 681279174), E = u(E, y, R, w, S[m + 0], k, 3936430074), w = u(w, E, y, R, S[m + 3], L, 3572445317), R = u(R, w, E, y, S[m + 6], D, 76029189), y = u(y, R, w, E, S[m + 9], J, 3654602809), E = u(E, y, R, w, S[m + 12], k, 3873151461), w = u(w, E, y, R, S[m + 15], L, 530742520), R = u(R, w, E, y, S[m + 2], D, 3299628645), y = c(y, R, w, E, S[m + 0], I, 4096336452), E = c(E, y, R, w, S[m + 7], P, 1126891415), w = c(w, E, y, R, S[m + 14], F, 2878612391), R = c(R, w, E, y, S[m + 5], j, 4237533241), y = c(y, R, w, E, S[m + 12], I, 1700485571), E = c(E, y, R, w, S[m + 3], P, 2399980690), w = c(w, E, y, R, S[m + 10], F, 4293915773), R = c(R, w, E, y, S[m + 1], j, 2240044497), y = c(y, R, w, E, S[m + 8], I, 1873313359), E = c(E, y, R, w, S[m + 15], P, 4264355552), w = c(w, E, y, R, S[m + 6], F, 2734768916), R = c(R, w, E, y, S[m + 13], j, 1309151649), y = c(y, R, w, E, S[m + 4], I, 4149444226), E = c(E, y, R, w, S[m + 11], P, 3174756917), w = c(w, E, y, R, S[m + 2], F, 718787259), R = c(R, w, E, y, S[m + 9], j, 3951481745), y = n(y, h), R = n(R, g), w = n(w, v), E = n(E, _)
}var H = l(y) + l® + l(w) + l(E);return H.toLowerCase()
}



import threading
import js2py
import time
mutex = threading.Lock()
with open(r".\t.js", encoding=‘utf-8’) as f:
cx = f.read()

def get_sign(_m_h6_tk_first, appkey, data):
# ctx = execjs.compile(cx)
# sign_str = _m_h6_tk_first + “&” + time_dd + “&” + “12574478” + “&” + data
# sign = ctx.call(“get_sign_demo”, sign_str)
# print("sign : ", sign)
t = int(time.time() * 1000)
mutex.acquire()
context = js2py.EvalJs()
context.execute(cx)
sign_str = _m_h6_tk_first + “&” + str(t) + “&” + appkey + “&” + data
sign = context.get_sign_demo(sign_str)
mutex.release()
return sign, t


### 分析


作者采集了某款眼霜产品作为分析对象。


#### 数据展示


![在这里插入图片描述](https://img-blog.csdnimg.cn/b319350a6da04ed09d9dd6728bcd1ccc.png?x-oss-process=image/watermark,type_d3F5LXplbmhlaQ,shadow_50,text_Q1NETiBA54ixcHl0aG9u55qE546L5LiJ6YeR,size_20,color_FFFFFF,t_70,g_se,x_16)


#### 词云分析


![在这里插入图片描述](https://img-blog.csdnimg.cn/90f6828e9a614147a650d9eaa581d669.png?x-oss-process=image/watermark,type_d3F5LXplbmhlaQ,shadow_50,text_Q1NETiBA54ixcHl0aG9u55qE546L5LiJ6YeR,size_20,color_FFFFFF,t_70,g_se,x_16)  
 通过词云可以看出消费者对于眼霜这款产品关注的点有以下几个方面:  
 1、去黑眼圈的效果  
 2、使用的年龄段  
 3、淡化细纹的效果  


![img](https://img-blog.csdnimg.cn/img_convert/2dc68a0ecc41aa2a840c464b3eaa14f0.png)
![img](https://img-blog.csdnimg.cn/img_convert/b653d69beeb75efc23c5031997f806f7.png)

**网上学习资料一大堆,但如果学到的知识不成体系,遇到问题时只是浅尝辄止,不再深入研究,那么很难做到真正的技术提升。**

**[需要这份系统化资料的朋友,可以戳这里获取](https://bbs.csdn.net/topics/618545628)**


**一个人可以走的很快,但一群人才能走的更远!不论你是正从事IT行业的老鸟或是对IT行业感兴趣的新人,都欢迎加入我们的的圈子(技术交流、学习资源、职场吐槽、大厂内推、面试辅导),让我们一起学习成长!**

获取](https://bbs.csdn.net/topics/618545628)**


**一个人可以走的很快,但一群人才能走的更远!不论你是正从事IT行业的老鸟或是对IT行业感兴趣的新人,都欢迎加入我们的的圈子(技术交流、学习资源、职场吐槽、大厂内推、面试辅导),让我们一起学习成长!**

  • 24
    点赞
  • 6
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值