爬虫攻守道 - 爬取某人点集数据

目标

[某人点集]记录了各大主流电商直播数据,用Python 实现登录 和 爬取数据

步骤1:登录

访问首页时会得到一些 js 文件,其中1个 app.js,包含了一段加密逻辑,用来对 pwd 进行加密,同时将手机号、加密后的密码、时间戳、并配合1个固定的字符串 进行加密。用这些参数,发起 post 请求,得到 token

步骤2:获取数据

用数据模块的参数(1个页面包含多个不同属性的数据块),加上时间戳,加上 salt,做sha256 得到 sign
然后综合数据模块参数,sign,时间戳,token 再次发起请求,即可得到数据

代码

Python

# !/usr/bin/env python3
# _*_ coding:utf-8 _*_
"""
@File               : hrdj.py
@Project            : S023_HRDJ
@CreateTime         : 2023/3/7 14:16
@Author             : biaobro
@Software           : PyCharm
@Last Modify Time   : 2023/3/7 14:16 
@Version            : 1.0
@Description        : None
"""
import hashlib
import json
import time
import execjs
import requests


def js_encrypt(input_str):
    with open('app.js', 'r', encoding='utf-8') as f:
        app_js = f.read()
    ctx = execjs.compile(app_js)
    function = 'i("%s")' % input_str
    res = ctx.eval(function)
    print(res)
    return res


headers = {
    "Content-Type": "application/json;charset=UTF-8",
}
phoneNum = '12345678901'
password = "23456789"
encrypt_pwd = js_encrypt(password)

timestamp = int(time.time() * 1000)
tenant = 1
fix_str = "JzyqgcoojMiQNuQoTlbR5EBT8TsqzJ"
encrypt_group = js_encrypt(phoneNum + encrypt_pwd + str(timestamp) + str(tenant) + fix_str)

payload_login = {
    'phoneNum': phoneNum,
    'pwd': encrypt_pwd,
    't': timestamp,
    'tenant': tenant,
    'sig': encrypt_group
}

url_phonePwdLogin = "https://user.hrdjyun.com/wechat/phonePwdLogin"
resp = requests.post(url_phonePwdLogin, data=json.dumps(payload_login), headers=headers).json()

token = resp['data']['token']
print(token)

data_no_rank = {"no": "dy0026", "data": {}}
data_rank_2 = {"no": "dy1011", "data": {"rankType": "2"}}
data_param = json.dumps(data_no_rank, separators=(',', ':'))
salt = "kbn%&)@<?FGkfs8sdf4Vg1*+;`kf5ndl$"

# json.dumps 把python 字典转换为json 字符串格式,
# 和 JS 中的 JSON.stringify() 函数不同的是, Python 中的这个转换会为了美观自动补空格
# 所以需要增加 separators 参数
data = "param=" + data_param + "&timestamp=" + str(int(time.time() * 1000)) + "&tenant=1&salt=" + salt
print(data)
data_sha = hashlib.sha256(data.encode('utf-8')).hexdigest()
print(data_sha)

url_data = "https://ucp.hrdjyun.com:60359/api/dy"
payload = {
    # 传参的时候要对引号做转义
    "param": data_param.replace('"', '\"'),
    "sign": data_sha,
    "tenant": str(tenant),
    "timestamp": int(time.time() * 1000),
    "token": token
}

resp = requests.post(url_data, data=json.dumps(payload), headers=headers).text
print(resp)

Javascript

/*
@File               : app.js
@Project            : S023_HRDJ
@CreateTime         : 2023/3/7 14:39
@Author             : biaobro
@Software           : PyCharm
@Last Modify Time   : 2023/3/7 14:39 
@Version            : 1.0
@Description        : None
*/
var a = 0

function i(n) {
    return c(o(s(n)))
}

function o(n) {
    return r(d(l(n), 8 * n.length))
}

function c(n) {
    for (var e, t = a ? "0123456789ABCDEF" : "0123456789abcdef", i = "", o = 0; o < n.length; o++)
        e = n.charCodeAt(o),
            i += t.charAt(e >>> 4 & 15) + t.charAt(15 & e);
    return i
}

function s(n) {
    var e, t, a = "", i = -1;
    while (++i < n.length)
        e = n.charCodeAt(i),
            t = i + 1 < n.length ? n.charCodeAt(i + 1) : 0,
        55296 <= e && e <= 56319 && 56320 <= t && t <= 57343 && (e = 65536 + ((1023 & e) << 10) + (1023 & t),
            i++),
            e <= 127 ? a += String.fromCharCode(e) : e <= 2047 ? a += String.fromCharCode(192 | e >>> 6 & 31, 128 | 63 & e) : e <= 65535 ? a += String.fromCharCode(224 | e >>> 12 & 15, 128 | e >>> 6 & 63, 128 | 63 & e) : e <= 2097151 && (a += String.fromCharCode(240 | e >>> 18 & 7, 128 | e >>> 12 & 63, 128 | e >>> 6 & 63, 128 | 63 & e));
    return a
}

function l(n) {
    for (var e = Array(n.length >> 2), t = 0; t < e.length; t++)
        e[t] = 0;
    for (t = 0; t < 8 * n.length; t += 8)
        e[t >> 5] |= (255 & n.charCodeAt(t / 8)) << t % 32;
    return e
}

function r(n) {
    for (var e = "", t = 0; t < 32 * n.length; t += 8)
        e += String.fromCharCode(n[t >> 5] >>> t % 32 & 255);
    return e
}

function d(n, e) {
    n[e >> 5] |= 128 << e % 32,
        n[14 + (e + 64 >>> 9 << 4)] = e;
    for (var t = 1732584193, a = -271733879, i = -1732584194, o = 271733878, c = 0; c < n.length; c += 16) {
        var s = t
            , l = a
            , r = i
            , d = o;
        t = h(t, a, i, o, n[c + 0], 7, -680876936),
            o = h(o, t, a, i, n[c + 1], 12, -389564586),
            i = h(i, o, t, a, n[c + 2], 17, 606105819),
            a = h(a, i, o, t, n[c + 3], 22, -1044525330),
            t = h(t, a, i, o, n[c + 4], 7, -176418897),
            o = h(o, t, a, i, n[c + 5], 12, 1200080426),
            i = h(i, o, t, a, n[c + 6], 17, -1473231341),
            a = h(a, i, o, t, n[c + 7], 22, -45705983),
            t = h(t, a, i, o, n[c + 8], 7, 1770035416),
            o = h(o, t, a, i, n[c + 9], 12, -1958414417),
            i = h(i, o, t, a, n[c + 10], 17, -42063),
            a = h(a, i, o, t, n[c + 11], 22, -1990404162),
            t = h(t, a, i, o, n[c + 12], 7, 1804603682),
            o = h(o, t, a, i, n[c + 13], 12, -40341101),
            i = h(i, o, t, a, n[c + 14], 17, -1502002290),
            a = h(a, i, o, t, n[c + 15], 22, 1236535329),
            t = m(t, a, i, o, n[c + 1], 5, -165796510),
            o = m(o, t, a, i, n[c + 6], 9, -1069501632),
            i = m(i, o, t, a, n[c + 11], 14, 643717713),
            a = m(a, i, o, t, n[c + 0], 20, -373897302),
            t = m(t, a, i, o, n[c + 5], 5, -701558691),
            o = m(o, t, a, i, n[c + 10], 9, 38016083),
            i = m(i, o, t, a, n[c + 15], 14, -660478335),
            a = m(a, i, o, t, n[c + 4], 20, -405537848),
            t = m(t, a, i, o, n[c + 9], 5, 568446438),
            o = m(o, t, a, i, n[c + 14], 9, -1019803690),
            i = m(i, o, t, a, n[c + 3], 14, -187363961),
            a = m(a, i, o, t, n[c + 8], 20, 1163531501),
            t = m(t, a, i, o, n[c + 13], 5, -1444681467),
            o = m(o, t, a, i, n[c + 2], 9, -51403784),
            i = m(i, o, t, a, n[c + 7], 14, 1735328473),
            a = m(a, i, o, t, n[c + 12], 20, -1926607734),
            t = A(t, a, i, o, n[c + 5], 4, -378558),
            o = A(o, t, a, i, n[c + 8], 11, -2022574463),
            i = A(i, o, t, a, n[c + 11], 16, 1839030562),
            a = A(a, i, o, t, n[c + 14], 23, -35309556),
            t = A(t, a, i, o, n[c + 1], 4, -1530992060),
            o = A(o, t, a, i, n[c + 4], 11, 1272893353),
            i = A(i, o, t, a, n[c + 7], 16, -155497632),
            a = A(a, i, o, t, n[c + 10], 23, -1094730640),
            t = A(t, a, i, o, n[c + 13], 4, 681279174),
            o = A(o, t, a, i, n[c + 0], 11, -358537222),
            i = A(i, o, t, a, n[c + 3], 16, -722521979),
            a = A(a, i, o, t, n[c + 6], 23, 76029189),
            t = A(t, a, i, o, n[c + 9], 4, -640364487),
            o = A(o, t, a, i, n[c + 12], 11, -421815835),
            i = A(i, o, t, a, n[c + 15], 16, 530742520),
            a = A(a, i, o, t, n[c + 2], 23, -995338651),
            t = p(t, a, i, o, n[c + 0], 6, -198630844),
            o = p(o, t, a, i, n[c + 7], 10, 1126891415),
            i = p(i, o, t, a, n[c + 14], 15, -1416354905),
            a = p(a, i, o, t, n[c + 5], 21, -57434055),
            t = p(t, a, i, o, n[c + 12], 6, 1700485571),
            o = p(o, t, a, i, n[c + 3], 10, -1894986606),
            i = p(i, o, t, a, n[c + 10], 15, -1051523),
            a = p(a, i, o, t, n[c + 1], 21, -2054922799),
            t = p(t, a, i, o, n[c + 8], 6, 1873313359),
            o = p(o, t, a, i, n[c + 15], 10, -30611744),
            i = p(i, o, t, a, n[c + 6], 15, -1560198380),
            a = p(a, i, o, t, n[c + 13], 21, 1309151649),
            t = p(t, a, i, o, n[c + 4], 6, -145523070),
            o = p(o, t, a, i, n[c + 11], 10, -1120210379),
            i = p(i, o, t, a, n[c + 2], 15, 718787259),
            a = p(a, i, o, t, n[c + 9], 21, -343485551),
            t = f(t, s),
            a = f(a, l),
            i = f(i, r),
            o = f(o, d)
    }
    return Array(t, a, i, o)
}

function u(n, e, t, a, i, o) {
    return f(g(f(f(e, n), f(a, o)), i), t)
}

function h(n, e, t, a, i, o, c) {
    return u(e & t | ~e & a, n, e, i, o, c)
}

function m(n, e, t, a, i, o, c) {
    return u(e & a | t & ~a, n, e, i, o, c)
}

function A(n, e, t, a, i, o, c) {
    return u(e ^ t ^ a, n, e, i, o, c)
}

function p(n, e, t, a, i, o, c) {
    return u(t ^ (e | ~a), n, e, i, o, c)
}

function f(n, e) {
    var t = (65535 & n) + (65535 & e)
        , a = (n >> 16) + (e >> 16) + (t >> 16);
    return a << 16 | 65535 & t
}

function g(n, e) {
    return n << e | n >>> 32 - e
}

// console.log(i("12345677"))
// "1678171820099" + "1" + "JzyqgcoojMiQNuQoTlbR5EBT8TsqzJ"
// console.log(i("16781718200991JzyqgcoojMiQNuQoTlbR5EBT8TsqzJ"))

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值