最全【app爬虫】某宝详情页《问大家》数据采集分析,2024年最新统统都会

img
img

网上学习资料一大堆,但如果学到的知识不成体系,遇到问题时只是浅尝辄止,不再深入研究,那么很难做到真正的技术提升。

需要这份系统化资料的朋友,可以戳这里获取

一个人可以走的很快,但一群人才能走的更远!不论你是正从事IT行业的老鸟或是对IT行业感兴趣的新人,都欢迎加入我们的的圈子(技术交流、学习资源、职场吐槽、大厂内推、面试辅导),让我们一起学习成长!

在这里插入图片描述

参数分析

通过代码比对发现 t、sign、data 是动态的参数
尝试不带sign参数请求数据发现请求失败了,很明显sign是一个加密参数需要动态计算,所以难点是如何找到sign的生成规则;

sign生成规则

一般说来请求的参数都是js生成,按照这个思路,通过在js资源文件中全局搜索sign及打断点的方式。果不其然,找到了相应的js代码,到目前为止,一切还是挺顺利的
在这里插入图片描述
从截图中可以发现
sign是由h()函数生成的,h函数的参数是以下几部分组成

d.token + "&" + i + "&" + g + "&" + c.data
# i 是上面固定参数中的 t,当前的时间戳
# g是上面固定参数中的 appKey
# c.data 是上面固定参数中的data

所以下一步要知道d.token是如何生成的

token的生成规则

还是使用在js资源文件中全局搜索token的方式,发现了相关的js代码

    n.prototype.__getTokenFromCookie = function() {
        var a = this.options;
        return a.CDR && j(y) ? a.token = j(y).split(";")[0] : a.token = a.token || j(z),
        a.token && (a.token = a.token.split("_")[0]),
        o.resolve()
    }

通过对这段js代码分析,发现token原来是从cookie中提取的:

_m_h5_tk=1632671ce1a89f17ac8d06525e4b5226_1555062955474

至此,全部的参数都已经知道生成规则了,接下来可以编写python代码请求数据了。

sign 生成

关于sign的生成公式:

md5Hex(token&t&appKey&data)
如:md5Hex(“645d1f414d4914297dfaab40f3f76016 &1234&4272&{“itemNumId”:“1500011132496”}”)
sign=d2b2f818a03496b296b899a230c03abd

计算js

function get\_sign\_demo(the_params_for_signing) {function t(e, t) {return e << t | e >>> 32 - t
    }function n(e, t) {var n, o, i, r, s;return i = 2147483648 & e, r = 2147483648 & t, n = 1073741824 & e, o = 1073741824 & t, s = (1073741823 & e) + (1073741823 & t), n & o ? 2147483648 ^ s ^ i ^ r : n | o ? 1073741824 & s ? 3221225472 ^ s ^ i ^ r : 1073741824 ^ s ^ i ^ r : s ^ i ^ r
    }function o(e, t, n) {return e & t | ~e & n
    }function i(e, t, n) {return e & n | t & ~n
    }function r(e, t, n) {return e ^ t ^ n
    }function s(e, t, n) {return t ^ (e | ~n)
    }function a(e, i, r, s, a, p, u) {return e = n(e, n(n(o(i, r, s), a), u)), n(t(e, p), i)
    }function p(e, o, r, s, a, p, u) {return e = n(e, n(n(i(o, r, s), a), u)), n(t(e, p), o)
    }function u(e, o, i, s, a, p, u) {return e = n(e, n(n(r(o, i, s), a), u)), n(t(e, p), o)
    }function c(e, o, i, r, a, p, u) {return e = n(e, n(n(s(o, i, r), a), u)), n(t(e, p), o)
    }function d(e) {for (var t, n = e.length, o = n + 8, i = (o - o % 64) / 64, r = 16 \* (i + 1), s = new Array(r - 1), a = 0, p = 0; n > p;) {
            t = (p - p % 4) / 4, a = p % 4 \* 8, s[t] = s[t] | e.charCodeAt(p) << a, p++
        }return t = (p - p % 4) / 4, a = p % 4 \* 8, s[t] = s[t] | 128 << a, s[r - 2] = n << 3, s[r - 1] = n >>> 29, s
    }function l(e) {var t, n, o = "",
            i = "";for (n = 0; 3 >= n; n++) {
            t = e >>> 8 \* n & 255, i = "0" + t.toString(16), o += i.substr(i.length - 2, 2)
        }return o
    }function f(e) {
        e = e.replace(/\r\n/g, "\n");for (var t = "", n = 0; n < e.length; n++) {var o = e.charCodeAt(n);128 > o ? t += String.fromCharCode(o) : o > 127 && 2048 > o ? (t += String.fromCharCode(o >> 6 | 192), t += String.fromCharCode(63 & o | 128)) : (t += String.fromCharCode(o >> 12 | 224), t += String.fromCharCode(o >> 6 & 63 | 128), t += String.fromCharCode(63 & o | 128))
        }return t
    }var m, h, g, v, _, y, R, w, E, S = [],
        O = 7,
        b = 12,
        T = 17,
        q = 22,
        A = 5,
        x = 9,
        C = 14,
        N = 20,
        J = 4,
        k = 11,
        L = 16,
        D = 23,
        I = 6,
        P = 10,
        F = 15,
        j = 21;for (the_params_for_signing = f(the_params_for_signing), S = d(the_params_for_signing), y = 1732584193, R = 4023233417, w = 2562383102, E = 271733878, m = 0; m < S.length; m += 16) {
        h = y, g = R, v = w, _ = E, y = a(y, R, w, E, S[m + 0], O, 3614090360), E = a(E, y, R, w, S[m + 1], b, 3905402710), w = a(w, E, y, R, S[m + 2], T, 606105819), R = a(R, w, E, y, S[m + 3], q, 3250441966), y = a(y, R, w, E, S[m + 4], O, 4118548399), E = a(E, y, R, w, S[m + 5], b, 1200080426), w = a(w, E, y, R, S[m + 6], T, 2821735955), R = a(R, w, E, y, S[m + 7], q, 4249261313), y = a(y, R, w, E, S[m + 8], O, 1770035416), E = a(E, y, R, w, S[m + 9], b, 2336552879), w = a(w, E, y, R, S[m + 10], T, 4294925233), R = a(R, w, E, y, S[m + 11], q, 2304563134), y = a(y, R, w, E, S[m + 12], O, 1804603682), E = a(E, y, R, w, S[m + 13], b, 4254626195), w = a(w, E, y, R, S[m + 14], T, 2792965006), R = a(R, w, E, y, S[m + 15], q, 1236535329), y = p(y, R, w, E, S[m + 1], A, 4129170786), E = p(E, y, R, w, S[m + 6], x, 3225465664), w = p(w, E, y, R, S[m + 11], C, 643717713), R = p(R, w, E, y, S[m + 0], N, 3921069994), y = p(y, R, w, E, S[m + 5], A, 3593408605), E = p(E, y, R, w, S[m + 10], x, 38016083), w = p(w, E, y, R, S[m + 15], C, 3634488961), R = p(R, w, E, y, S[m + 4], N, 3889429448), y = p(y, R, w, E, S[m + 9], A, 568446438), E = p(E, y, R, w, S[m + 14], x, 3275163606), w = p(w, E, y, R, S[m + 3], C, 4107603335), R = p(R, w, E, y, S[m + 8], N, 1163531501), y = p(y, R, w, E, S[m + 13], A, 2850285829), E = p(E, y, R, w, S[m + 2], x, 4243563512), w = p(w, E, y, R, S[m + 7], C, 1735328473), R = p(R, w, E, y, S[m + 12], N, 2368359562), y = u(y, R, w, E, S[m + 5], J, 4294588738), E = u(E, y, R, w, S[m + 8], k, 2272392833), w = u(w, E, y, R, S[m + 11], L, 1839030562), R = u(R, w, E, y, S[m + 14], D, 4259657740), y = u(y, R, w, E, S[m + 1], J, 2763975236), E = u(E, y, R, w, S[m + 4], k, 1272893353), w = u(w, E, y, R, S[m + 7], L, 4139469664), R = u(R, w, E, y, S[m + 10], D, 3200236656), y = u(y, R, w, E, S[m + 13], J, 681279174), E = u(E, y, R, w, S[m + 0], k, 3936430074), w = u(w, E, y, R, S[m + 3], L, 3572445317), R = u(R, w, E, y, S[m + 6], D, 76029189), y = u(y, R, w, E, S[m + 9], J, 3654602809), E = u(E, y, R, w, S[m + 12], k, 3873151461), w = u(w, E, y, R, S[m + 15], L, 530742520), R = u(R, w, E, y, S[m + 2], D, 3299628645), y = c(y, R, w, E, S[m + 0], I, 4096336452), E = c(E, y, R, w, S[m + 7], P, 1126891415), w = c(w, E, y, R, S[m + 14], F, 2878612391), R = c(R, w, E, y, S[m + 5], j, 4237533241), y = c(y, R, w, E, S[m + 12], I, 1700485571), E = c(E, y, R, w, S[m + 3], P, 2399980690), w = c(w, E, y, R, S[m + 10], F, 4293915773), R = c(R, w, E, y, S[m + 1], j, 2240044497), y = c(y, R, w, E, S[m + 8], I, 1873313359), E = c(E, y, R, w, S[m + 15], P, 4264355552), w = c(w, E, y, R, S[m + 6], F, 2734768916), R = c(R, w, E, y, S[m + 13], j, 1309151649), y = c(y, R, w, E, S[m + 4], I, 4149444226), E = c(E, y, R, w, S[m + 11], P, 3174756917), w = c(w, E, y, R, S[m + 2], F, 718787259), R = c(R, w, E, y, S[m + 9], j, 3951481745), y = n(y, h), R = n(R, g), w = n(w, v), E = n(E, _)
    }var H = l(y) + l(R) + l(w) + l(E);return H.toLowerCase()
}

import threading
import js2py
import time
mutex = threading.Lock()
with open(r".\t.js", encoding='utf-8') as f:
    cx = f.read()

def get\_sign(_m_h6_tk_first, appkey, data):
    # ctx = execjs.compile(cx)
    # sign\_str = \_m\_h6\_tk\_first + "&" + time\_dd + "&" + "12574478" + "&" + data
    # sign = ctx.call("get\_sign\_demo", sign\_str)
    # print("sign : ", sign)
    t = int(time.time() \* 1000)
    mutex.acquire()
    context = js2py.EvalJs()
    context.execute(cx)
    sign_str = _m_h6_tk_first + "&" + str(t) + "&" + appkey + "&" + data
    sign = context.get_sign_demo(sign_str)
    mutex.release()
    return sign, t



![img](https://img-blog.csdnimg.cn/img_convert/91dcae8ee85b43406b66d737d95480e7.png)
![img](https://img-blog.csdnimg.cn/img_convert/ccb45ef628e70adbfe03f9f49d44260b.png)

**网上学习资料一大堆,但如果学到的知识不成体系,遇到问题时只是浅尝辄止,不再深入研究,那么很难做到真正的技术提升。**

**[需要这份系统化资料的朋友,可以戳这里获取](https://bbs.csdn.net/topics/618545628)**


**一个人可以走的很快,但一群人才能走的更远!不论你是正从事IT行业的老鸟或是对IT行业感兴趣的新人,都欢迎加入我们的的圈子(技术交流、学习资源、职场吐槽、大厂内推、面试辅导),让我们一起学习成长!**

化资料的朋友,可以戳这里获取](https://bbs.csdn.net/topics/618545628)**


**一个人可以走的很快,但一群人才能走的更远!不论你是正从事IT行业的老鸟或是对IT行业感兴趣的新人,都欢迎加入我们的的圈子(技术交流、学习资源、职场吐槽、大厂内推、面试辅导),让我们一起学习成长!**

  • 3
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值