jr某头条新闻爬虫(超详细)

 最近在研究怎么爬jr某头条新闻信息,但jr某头条是一个js动态加载的网站,需要解密参数_signature。话不多说直接上思路步骤。

打开开发者工具进行抓包,发现数据包都存在feed?channel_id里。 

摁住Crtl+shift+f打开搜索框,搜索_signature,发现只有一个js文件,点进去。

在js文件里crtl+f搜索_signature,观察得到_signature:a打断点,​​​​发现a就是_signature,在往上看var a = S(n, e),点进去S()函数。

 点进去后在打断点,S函数return o--》var o = (null === (n = window.byted_acrawler) || void 0 === n ? void 0 : null === (a = n.sign) || void 0 === a ? void 0 : a.call(n, i)) || "";通过观察发现,这个函数其实是调用了window.byted_acrawler.sign。点进n.sign()函数。

 发现这一整个acrawler.js文件就是解密_signature参数的。将这一整个js代码复制下来到工具里调试,发现缺少很多浏览器环境window,document,navigator,location,需要自己手动补。到了最熟悉的缺什么补什么。思路:_signature = a --> a = S(n, e) --> 函数S return o --> o = (null === (n = window.byted_acrawler) || void 0 === n ? void 0 : null === (a = n.sign) || void 0 === a ? void 0 : a.call(n, i)) || "" --> 实际上就是调用函数的 window.byted_acrawler.sign({url: / 'https://www.toutiao.com/api/pc/list/feed?channel_id=3189398996&min_behot_time=0&refresh_count=1&category=pc_profile_channel'}) 方法。

注意!!! 

 解密的_signature:

 在pyecharm中使用execjs库解密的代码。

def get_signature():  # 解密signature参数
    with open('jrtt.js', encoding='utf-8')as f:
        jscode = f.read()
    signature = execjs.compile(jscode).call('get_signature')
    return signature

js代码:

# 缺少浏览器环境window,document,navigator, location。需要补环境
window = global;
document = {
    referrer : 'https://www.toutiao.com/'
};
location = {
    href : 'https://www.toutiao.com/?wid=1635125685888',
    protocol: 'https:',
};
navigator = {
    userAgent:'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.54 Safari/537.36',
};


var glb;
(glb = "undefined" == typeof window ? global : window)._$jsvmprt = function(b, e, f) {
    function a() {
        if ("undefined" == typeof Reflect || !Reflect.construct)
            return !1;
        if (Reflect.construct.sham)
            return !1;
        if ("function" == typeof Proxy)
            return !0;
        try {
            return Date.prototype.toString.call(Reflect.construct(Date, [], (function() {}
            ))),
            !0
        } catch (b) {
            return !1
        }
    }
    function d(b, e, f) {
        return (d = a() ? Reflect.construct : function(b, e, f) {
            var a = [null];
            a.push.apply(a, e);
            var d = new (Function.bind.apply(b, a));
            return f && c(d, f.prototype),
            d
        }
        ).apply(null, arguments)
    }
    function c(b, e) {
        return (c = Object.setPrototypeOf || function(b, e) {
            return b.__proto__ = e,
            b
        }
        )(b, e)
    }
    function n(b) {
        return function(b) {
            if (Array.isArray(b)) {
                for (var e = 0, f = new Array(b.length); e < b.length; e++)
                    f[e] = b[e];
                return f
            }
        }(b) || function(b) {
            if (Symbol.iterator in Object(b) || "[object Arguments]" === Object.prototype.toString.call(b))
                return Array.from(b)
        }(b) || function() {
            throw new TypeError("Invalid attempt to spread non-iterable instance")
        }()
    }
    for (var i = [], r = 0, t = [], o = 0, l = function(b, e) {
        var f = b[e++]
          , a = b[e]
          , d = parseInt("" + f + a, 16);
        if (d >> 7 == 0)
            return [1, d];
        if (d >> 6 == 2) {
            var c = parseInt("" + b[++e] + b[++e], 16);
            return d &= 63,
            [2, c = (d <<= 8) + c]
        }
        if (d >> 6 == 3) {
            var n = parseInt("" + b[++e] + b[++e], 16)
              , i = parseInt("" + b[++e] + b[++e], 16);
            return d &= 63,
            [3, i = (d <<= 16) + (n <<= 8) + i]
        }
    }, u = function(b, e) {
        var f = parseInt("" + b[e] + b[e + 1], 16);
        return f = f > 127 ? -256 + f : f
    }, s = function(b, e) {
        var f = parseInt("" + b[e] + b[e + 1] + b[e + 2] + b[e + 3], 16);
        return f = f > 32767 ? -65536 + f : f
    }, p = function(b, e) {
        var f = parseInt("" + b[e] + b[e + 1] + b[e + 2] + b[e + 3] + b[e + 4] + b[e + 5] + b[e + 6] + b[e + 7], 16);
        return f = f > 2147483647 ? 0 + f : f
    }, y = function(b, e) {
        return parseInt("" + b[e] + b[e + 1], 16)
    }, v = function(b, e) {
        return parseInt("" + b[e] + b[e + 1] + b[e + 2] + b[e + 3], 16)
    }, g = g || this || window, h = Object.keys || function(b) {
        var e = {}
          , f = 0;
        for (var a in b)
            e[f++] = a;
        return e.length = f,
        e
    }
    , m = (b.length,
    0), I = "", C = m; C < m + 16; C++) {
        var q = "" + b[C++] + b[C];
        q = parseInt(q, 16),
        I += String.fromCharCode(q)
    }
    if ("HNOJ@?RC" != I)
        throw new Error("error magic number " + I);
    m += 16;
    parseInt("" + b[m] + b[m + 1], 16);
    m += 8,
    r = 0;
    for (var w = 0; w < 4; w++) {
        var S = m + 2 * w
          , R = "" + b[S++] + b[S]
          , x = parseInt(R, 16);
        r += (3 & x) << 2 * w
    }
    m += 16,
    m += 8;
    var z = parseInt("" + b[m] + b[m + 1] + b[m + 2] + b[m + 3] + b[m + 4] + b[m + 5] + b[m + 6] + b[m + 7], 16)
      , O = z
      , E = m += 8
      , j = v(b, m += z);
    j[1];
    m += 4,
    i = {
        p: [],
        q: []
    };
    for (var A = 0; A < j; A++) {
        for (var D = l(b, m), T = m += 2 * D[0], $ = i.p.length, P = 0; P < D[1]; P++) {
            var U = l(b, T);
            i.p.push(U[1]),
            T += 2 * U[0]
        }
        m = T,
        i.q.push([$, i.p.length])
    }
    var _ = {
        5: 1,
        6: 1,
        70: 1,
        22: 1,
        23: 1,
        37: 1,
        73: 1
    }
      , k = {
        72: 1
    }
      , M = {
        74: 1
    }
      , H = {
        11: 1,
        12: 1,
        24: 1,
        26: 1,
        27: 1,
        31: 1
    }
      , J = {
        10: 1
    }
      , N = {
        2: 1,
        29: 1,
        30: 1,
        20: 1
    }
      , B = []
      , W = [];
    function F(b, e, f) {
        for (var a = e; a < e + f; ) {
            var d = y(b, a);
            B[a] = d,
            a += 2;
            k[d] ? (W[a] = u(b, a),
            a += 2) : _[d] ? (W[a] = s(b, a),
            a += 4) : M[d] ? (W[a] = p(b, a),
            a += 8) : H[d] ? (W[a] = y(b, a),
            a += 2) : J[d] ? (W[a] = v(b, a),
            a += 4) : N[d] && (W[a] = v(b, a),
            a += 4)
        }
    }
    return K(b, E, O / 2, [], e, f);
    function G(b, e, f, a, c, l, m, I) {
        null == l && (l = this);
        var C, q, w, S = [], R = 0;
        m && (C = m);
        var x, z, O = e, E = O + 2 * f;
        if (!I)
            for (; O < E; ) {
                var j = parseInt("" + b[O] + b[O + 1], 16);
                O += 2;
                var A = 3 & (x = 13 * j % 241);
                if (x >>= 2,
                A < 1) {
                    A = 3 & x;
                    if (x >>= 2,
                    A > 2)
                        (A = x) > 10 ? S[++R] = void 0 : A > 1 ? (C = S[R--],
                        S[R] = S[R] >= C) : A > -1 && (S[++R] = null);
                    else if (A > 1) {
                        if ((A = x) > 11)
                            throw S[R--];
                        if (A > 7) {
                            for (C = S[R--],
                            z = v(b, O),
                            A = "",
                            P = i.q[z][0]; P < i.q[z][1]; P++)
                                A += String.fromCharCode(r ^ i.p[P]);
                            O += 4,
                            S[R--][A] = C
                        } else
                            A > 5 && (S[R] = h(S[R]))
                    } else if (A > 0) {
                        (A = x) > 8 ? (C = S[R--],
                        S[R] = typeof C) : A > 6 ? S[R] = --S[R] : A > 4 ? S[R -= 1] = S[R][S[R + 1]] : A > 2 && (q = S[R--],
                        (A = S[R]).x === G ? A.y >= 1 ? S[R] = K(b, A.c, A.l, [q], A.z, w, null, 1) : (S[R] = K(b, A.c, A.l, [q], A.z, w, null, 0),
                        A.y++) : S[R] = A(q))
                    } else {
                        if ((A = x) > 14)
                            z = s(b, O),
                            (U = function e() {
                                var f = arguments;
                                return e.y > 0 ? K(b, e.c, e.l, f, e.z, this, null, 0) : (e.y++,
                                K(b, e.c, e.l, f, e.z, this, null, 0))
                            }
                            ).c = O + 4,
                            U.l = z - 2,
                            U.x = G,
                            U.y = 0,
                            U.z = c,
                            S[R] = U,
                            O += 2 * z - 2;
                        else if (A > 12)
                            q = S[R--],
                            w = S[R--],
                            (A = S[R--]).x === G ? A.y >= 1 ? S[++R] = K(b, A.c, A.l, q, A.z, w, null, 1) : (S[++R] = K(b, A.c, A.l, q, A.z, w, null, 0),
                            A.y++) : S[++R] = A.apply(w, q);
                        else if (A > 5)
                            C = S[R--],
                            S[R] = S[R] != C;
                        else if (A > 3)
                            C = S[R--],
                            S[R] = S[R] * C;
                        else if (A > -1)
                            return [1, S[R--]]
                    }
                } else if (A < 2) {
                    A = 3 & x;
                    if (x >>= 2,
                    A < 1) {
                        if ((A = x) > 9)
                            ;
                        else if (A > 7)
                            C = S[R--],
                            S[R] = S[R] & C;
                        else if (A > 5)
                            z = y(b, O),
                            O += 2,
                            S[R -= z] = 0 === z ? new S[R] : d(S[R], n(S.slice(R + 1, R + z + 1)));
                        else if (A > 3) {
                            z = s(b, O);
                            try {
                                if (t[o][2] = 1,
                                1 == (C = G(b, O + 4, z - 3, [], c, l, null, 0))[0])
                                    return C
                            } catch (m) {
                                if (t[o] && t[o][1] && 1 == (C = G(b, t[o][1][0], t[o][1][1], [], c, l, m, 0))[0])
                                    return C
                            } finally {
                                if (t[o] && t[o][0] && 1 == (C = G(b, t[o][0][0], t[o][0][1], [], c, l, null, 0))[0])
                                    return C;
                                t[o] = 0,
                                o--
                            }
                            O += 2 * z - 2
                        }
                    } else if (A < 2) {
                        if ((A = x) > 12)
                            S[++R] = u(b, O),
                            O += 2;
                        else if (A > 10)
                            C = S[R--],
                            S[R] = S[R] << C;
                        else if (A > 8) {
                            for (z = v(b, O),
                            A = "",
                            P = i.q[z][0]; P < i.q[z][1]; P++)
                                A += String.fromCharCode(r ^ i.p[P]);
                            O += 4,
                            S[R] = S[R][A]
                        } else
                            A > 6 && (q = S[R--],
                            C = delete S[R--][q])
                    } else if (A < 3) {
                        (A = x) < 2 ? S[++R] = C : A < 4 ? (C = S[R--],
                        S[R] = S[R] <= C) : A < 11 ? (C = S[R -= 2][S[R + 1]] = S[R + 2],
                        R--) : A < 13 && (C = S[R],
                        S[++R] = C)
                    } else {
                        if ((A = x) > 12)
                            S[++R] = l;
                        else if (A > 5)
                            C = S[R--],
                            S[R] = S[R] !== C;
                        else if (A > 3)
                            C = S[R--],
                            S[R] = S[R] / C;
                        else if (A > 1) {
                            if ((z = s(b, O)) < 0) {
                                I = 1,
                                F(b, e, 2 * f),
                                O += 2 * z - 2;
                                break
                            }
                            O += 2 * z - 2
                        } else
                            A > -1 && (S[R] = !S[R])
                    }
                } else if (A < 3) {
                    A = 3 & x;
                    if (x >>= 2,
                    A > 2)
                        (A = x) > 7 ? (C = S[R--],
                        S[R] = S[R] | C) : A > 5 ? (z = y(b, O),
                        O += 2,
                        S[++R] = c["$" + z]) : A > 3 && (z = s(b, O),
                        t[o][0] && !t[o][2] ? t[o][1] = [O + 4, z - 3] : t[o++] = [0, [O + 4, z - 3], 0],
                        O += 2 * z - 2);
                    else if (A > 1) {
                        if ((A = x) < 2) {
                            for (z = v(b, O),
                            C = "",
                            P = i.q[z][0]; P < i.q[z][1]; P++)
                                C += String.fromCharCode(r ^ i.p[P]);
                            S[++R] = C,
                            O += 4
                        } else if (A < 4)
                            if (S[R--])
                                O += 4;
                            else {
                                if ((z = s(b, O)) < 0) {
                                    I = 1,
                                    F(b, e, 2 * f),
                                    O += 2 * z - 2;
                                    break
                                }
                                O += 2 * z - 2
                            }
                        else
                            A < 6 ? (C = S[R--],
                            S[R] = S[R] % C) : A < 8 ? (C = S[R--],
                            S[R] = S[R]instanceof C) : A < 15 && (S[++R] = !1)
                    } else if (A > 0) {
                        (A = x) < 1 ? S[++R] = g : A < 3 ? (C = S[R--],
                        S[R] = S[R] + C) : A < 5 ? (C = S[R--],
                        S[R] = S[R] == C) : A < 14 && (C = S[R - 1],
                        q = S[R],
                        S[++R] = C,
                        S[++R] = q)
                    } else {
                        (A = x) < 2 ? (C = S[R--],
                        S[R] = S[R] > C) : A < 9 ? (z = v(b, O),
                        O += 4,
                        q = R + 1,
                        S[R -= z - 1] = z ? S.slice(R, q) : []) : A < 11 ? (z = y(b, O),
                        O += 2,
                        C = S[R--],
                        c[z] = C) : A < 13 ? (C = S[R--],
                       
  • 10
    点赞
  • 24
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值