百度在线翻译爬虫

参考:https://blog.csdn.net/zhu6201976/article/details/98262497

class BaiDuFanYi:
    def __init__(self):
        self.user_agent = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.81 Safari/537.36'
        self.cookies = None
        self.gtk = None
        self.token = None
        self.__get_init_cookies()
        self.headers = {"Content-Type": "application/x-www-form-urlencoded",
                       "User-Agent": self.user_agent}
        self.session = requests.Session()
        self.session.headers.update(self.headers)
        self.session.cookies.update(self.cookies)
        self.language = {"中文":"zh",
                         "英文":"en",
                         "韩文":"kor"}
    def __get_init_cookies(self):
        headers = {'User-Agent': self.user_agent}
        url = "https://fanyi.baidu.com/"
        res = requests.get(url, verify=False)
        self.cookies = res.cookies
        gtk = re.findall(r";window.gtk = ('.*?');", res.text)[0]
        self.gtk = gtk
        assert res.status_code==200
        res = requests.get(url, headers=headers, cookies=res.cookies, verify=False)
        assert res.status_code==200
        token = re.findall(r"token: '(.*?)'", res.text)[0]
        self.token = token
    
    def __get_sign(self, keyword):
        js = r"""
            function n(r, o) {
                for (var t = 0; t < o.length - 2; t += 3) {
                var a = o.charAt(t + 2);
                a = a >= "a" ? a.charCodeAt(0) - 87 : Number(a), a = "+" === o.charAt(t + 1) ? r >>> a : r << a, r = "+" === o.charAt(t) ? r + a & 4294967295 : r ^ a
                }
                return r
            }
            function e(r) {
                var o = r.match(/[\uD800-\uDBFF][\uDC00-\uDFFF]/g);
                if (null === o) {
                    var t = r.length;
                    t > 30 && (r = "" + r.substr(0, 10) + r.substr(Math.floor(t / 2) - 5, 10) + r.substr(-10, 10))
                } else {
                    for (var e = r.split(/[\uD800-\uDBFF][\uDC00-\uDFFF]/), C = 0, h = e.length, f = []; h > C; C++) "" !== e[C] && f.push.apply(f, a(e[C].split(""))), C !== h - 1 && f.push(o[C]);
                    var g = f.length;
                    g > 30 && (r = f.slice(0, 10).join("") + f.slice(Math.floor(g / 2) - 5, Math.floor(g / 2) + 5).join("") + f.slice(-10).join(""))
                }
                var u = void 0, l = "" + String.fromCharCode(103) + String.fromCharCode(116) + String.fromCharCode(107);
                u = null !== i ? i : (i = window[l] || "") || "";
                for (var d = u.split("."), m = Number(d[0]) || 0, s = Number(d[1]) || 0, S = [], c = 0, v = 0; v < r.length; v++) {
                    var A = r.charCodeAt(v);
                    128 > A ? S[c++] = A : (2048 > A ? S[c++] = A >> 6 | 192 : (55296 === (64512 & A) && v + 1 < r.length && 56320 === (64512 & r.charCodeAt(v + 1)) ? (A = 65536 + ((1023 & A) << 10) + (1023 & r.charCodeAt(++v)), S[c++] = A >> 18 | 240, S[c++] = A >> 12 & 63 | 128) : S[c++] = A >> 12 | 224, S[c++] = A >> 6 & 63 | 128), S[c++] = 63 & A | 128)
                }
                for (var p = m, F = "" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(97) + ("" + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(54)), D = "" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(51) + ("" + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(98)) + ("" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(102)), b = 0; b < S.length; b++) p += S[b], p = n(p, F);
                return p = n(p, D), p ^= s, 0 > p && (p = (2147483647 & p) + 2147483648), p %= 1e6, p.toString() + "." + (p ^ m)
            }"""
        # js中替换gtk
        js = js.replace(r'null !== i ? i : (i = window[l] || "") || ""', self.gtk)
        # 执行js,定义加密函数e(r)
        context = js2py.EvalJs()
        context.execute(js)
        # 执行加密函数e(r),对keywords进行加密
        sign = context.e(keyword)
        return sign
    
    def translate(self, keyword, from_language, to_language):
        from_language = self.language[from_language]
        to_language = self.language[to_language]
        sign = self.__get_sign(keyword)
        data = {
                'from': from_language,
                'to': to_language,
                'query': keyword,
                'transtype': 'translang',
                'simple_means_flag': 3,
                'sign': sign,  # 此参数需破解
                'token': self.token,  # 此参数需破解
                'domain': 'common'
                }
        res = self.session.post("https://fanyi.baidu.com/v2transapi?from=zh&to=en", data=data, verify=False)
        assert res.status_code==200
        return res.json()
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值