python (用js加密)爬取百度翻译

1.进入百度翻译页面,用f12或右键检查,点击网络(network)刷新,在输入区输入你想要翻译的内容,

   再一次刷新页面

2. 将响应的数据复制粘贴到JSON在线解析及格式化验证中,查看有没有我们要的值

 

3. 点击标头获取你要翻译内容的

post_url= 'https://fanyi.baidu.com/v2transapi?from=en&to=zh'

 

4. 查看表单参数(在输入区从输入一个单词)查看这两个表单参数

                                                    

  查看后我们发现query就是我们要翻译的单词,token,domain的值没有改变,只有sign的值发生了改变。

 

5.现在我们就要分析sign的值是咋来的

        

点击“{}”格式化

 

"Ctrl+F"输入sign找跟表单相似的,打个断点刷新页面

                           

sign是 js加密的,在打个断点,刷新界面,将这个代码复制粘贴到Pycham中

运行这些代码,可以发现没有i的值跟n的值,我们继续调试js找到i跟n的值

这是js加密的代码

var i="320305.131321201"
function n(r, o) {
    for (var t = 0; t < o.length - 2; t += 3) {
        var a = o.charAt(t + 2);
        a = a >= "a" ? a.charCodeAt(0) - 87 : Number(a),
            a = "+" === o.charAt(t + 1) ? r >>> a : r << a,
            r = "+" === o.charAt(t) ? r + a & 4294967295 : r ^ a
    }
    return r
}
function e(r) {
    var o = r.match(/[\uD800-\uDBFF][\uDC00-\uDFFF]/g);
    if (null === o) {
        var t = r.length;
        t > 30 && (r = "" + r.substr(0, 10) + r.substr(Math.floor(t / 2) - 5, 10) + r.substr(-10, 10))
    } else {
        for (var e = r.split(/[\uD800-\uDBFF][\uDC00-\uDFFF]/), C = 0, h = e.length, f = []; h > C; C++)
            "" !== e[C] && f.push.apply(f, a(e[C].split(""))),
            C !== h - 1 && f.push(o[C]);
        var g = f.length;
        g > 30 && (r = f.slice(0, 10).join("") + f.slice(Math.floor(g / 2) - 5, Math.floor(g / 2) + 5).join("") + f.slice(-10).join(""))
    }
    var u = void 0
        , l = "" + String.fromCharCode(103) + String.fromCharCode(116) + String.fromCharCode(107);
    u = null !== i ? i : (i = window[l] || "") || "";
    for (var d = u.split("."), m = Number(d[0]) || 0, s = Number(d[1]) || 0, S = [], c = 0, v = 0; v < r.length; v++) {
        var A = r.charCodeAt(v);
        128 > A ? S[c++] = A : (2048 > A ? S[c++] = A >> 6 | 192 : (55296 === (64512 & A) && v + 1 < r.length && 56320 === (64512 & r.charCodeAt(v + 1)) ? (A = 65536 + ((1023 & A) << 10) + (1023 & r.charCodeAt(++v)),
            S[c++] = A >> 18 | 240,
            S[c++] = A >> 12 & 63 | 128) : S[c++] = A >> 12 | 224,
            S[c++] = A >> 6 & 63 | 128),
            S[c++] = 63 & A | 128)
    }
    for (var p = m, F = "" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(97) + ("" + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(54)), D = "" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(51) + ("" + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(98)) + ("" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(102)), b = 0; b < S.length; b++)
        p += S[b],
            p = n(p, F);
    return p = n(p, D),
        p ^= s,
    0 > p && (p = (2147483647 & p) + 2147483648),
        p %= 1e6,
    p.toString() + "." + (p ^ m)
}

现在我们得到了sign的参数,然后我们继续在js中找token的值,发现在js中找不到token的值,

下来让我们就到百度翻译的原页面的元素中找,发现token的值

现在我们就得到表单里所有的值,开始写代码

import re
import execjs
import requests


class BaiDu:
    def __init__(self, query):
        self.query = query
        self.token_url = 'https://fanyi.baidu.com'
        self.json_url = "https://fanyi.baidu.com/v2transapi?from=glg&to=zh"
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36',
            'Cookie': 'BIDUPSID=614E2F635CDC210CDD3EC692062C48E1; PSTM=1615357219; BAIDUID=5D6010D788F6A92D3EFF7D30EF3FC978:FG=1; __yjs_duid=1_be93ea5b0a723764fe483f12891e547c1616027303752; REALTIME_TRANS_SWITCH=1; FANYI_WORD_SWITCH=1; HISTORY_SWITCH=1; SOUND_PREFER_SWITCH=1; SOUND_SPD_SWITCH=1; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; H_PS_PSSID=33359_33273_31660_33692_33595_33392_33713_26350; delPer=0; PSINO=2; BCLID=10915145684757159273; BDSFRCVID=fIkOJexroG3VC5QeSbUkhXPMuFweG7bTDYLEOwXPsp3LGJLVJeC6EG0Pts1-dEu-EHtdogKK0gOTH6KF_2uxOjjg8UtVJeC6EG0Ptf8g0M5; H_BDCLCKID_SF=tR3aQ5rtKRTffjrnhPF3bxFfXP6-hnjy3b7p5K5l5xODSJvdWh3Y5fKWbttf5q3RymJ42-39LPO2hpRjyxv4y4Ldj4oxJpOJ-bCL0p5aHl51fbbvbURvD--g3-AqBM5dtjTO2bc_5KnlfMQ_bf--QfbQ0hOhqP-jBRIE3-oJqCDbhDP43H; BCLID_BFESS=10915145684757159273; BDSFRCVID_BFESS=fIkOJexroG3VC5QeSbUkhXPMuFweG7bTDYLEOwXPsp3LGJLVJeC6EG0Pts1-dEu-EHtdogKK0gOTH6KF_2uxOjjg8UtVJeC6EG0Ptf8g0M5; H_BDCLCKID_SF_BFESS=tR3aQ5rtKRTffjrnhPF3bxFfXP6-hnjy3b7p5K5l5xODSJvdWh3Y5fKWbttf5q3RymJ42-39LPO2hpRjyxv4y4Ldj4oxJpOJ-bCL0p5aHl51fbbvbURvD--g3-AqBM5dtjTO2bc_5KnlfMQ_bf--QfbQ0hOhqP-jBRIE3-oJqCDbhDP43H; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=1616139414,1616402109; __yjsv5_shitong=1.0_7_473759471dbf74795cb53f7650ecabf4bd60_300_1616402109494_124.114.149.35_43dbd720; ab_sr=1.0.0_Y2U2ZjFiYzRjYjVlZmFmNjQ4YzIxMzM2YWZkMjk0MTM2ZmIxMjdkMDgyZTk5MWZkNWZiM2I4MTU0NDFiMTkxZWY1ZTY4ZmM4MGZhZTA4ZDU3ZDY4NWZlNjhiNzRjNWQ2; Hm_lpvt_64ecd82404c51e03dc91cb9e8c025574=1616406106',
        }

    def __call__(self, *args, **kwargs):
        self.get_json()

    def read_js(self, path):
        with open(path, "r")as f:
            res = f.read()
            return res

    def sign(self):
        res = execjs.compile(self.read_js("555.js"), )
        sign = res.call("e", self.query)
        return sign

    def token(self):
        html = requests.get(self.token_url, headers=self.headers).text
        token = re.findall("<script>.*? token:(.*?)systime:.*?", html, re.S)
        token = token[0].strip().strip("'").strip("',") if token else ""
        return token

    def get_json(self):
        sign = self.sign()
        token = self.token()
        data = {
            'from': 'zh',
            'to': 'en',
            'query': self.query,
            'simple_means_flag': '3',
            'sign': sign,
            'token': token,
            'domain': 'common',
        }
        json_data = requests.post(self.json_url, headers=self.headers, data=data).json()
        pos = json_data['trans_result']["data"][0]['dst']
        print(pos)


if __name__ == '__main__':
    word = input("请输入你要翻译的内容")
    baidu = BaiDu(word)
    baidu()

 

  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 4
    评论
评论 4
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值