python 百度翻译有道翻译

最新推荐文章于 2024-12-18 09:30:38 发布

weixin_43964993

最新推荐文章于 2024-12-18 09:30:38 发布

阅读量6.4k

点赞数 1

分类专栏： python 工具

本文链接：https://blog.csdn.net/weixin_43964993/article/details/117199981

版权

python 同时被 2 个专栏收录

40 篇文章

订阅专栏

工具

13 篇文章

订阅专栏

本文介绍了如何使用Python实现对百度翻译的调用，通过baidufanyi.py脚本进行实时翻译，并展示了如何处理cookies和获取签名，适用于爬取和自动化翻译需求。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

百度翻译

baidufanyi.py

import requests
import re
import json
import execjs

URL = 'https://fanyi.baidu.com/?aldtype=16047#zh/en/'
TRANSLATE_API = 'https://fanyi.baidu.com/v2transapi'
REALTRANSLATE_API = 'https://fanyi.baidu.com/transapi'

HEADERS = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 SE 2.X MetaSr 1.0',
    'Accept': '*/*',
    'Accept-Language': 'zh-CN,zh;q=0.9',
    'Connection': 'keep-alive',
    'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
    'Host': 'fanyi.baidu.com',
    'Origin': 'https://fanyi.baidu.com',
    'Referer': 'https://fanyi.baidu.com/',
    'X-Requested-With': 'XMLHttpRequest',
}

HEADERS2 = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 SE 2.X MetaSr 1.0', 'Accept': '*/*', 'Accept-Language': 'zh-CN,zh;q=0.9', 'Connection': 'keep-alive', 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', 'Host': 'fanyi.baidu.com', 'Origin': 'https://fanyi.baidu.com', 'Referer': 'https://fanyi.baidu.com/', 'X-Requested-With': 'XMLHttpRequest', 'Cookie': 'BAIDUID=BEA2658FC962DF6CA0C053E5690C1934:FG=1; locale=zh; REALTIME_TRANS_SWITCH=1; FANYI_WORD_SWITCH=1; HISTORY_SWITCH=1; SOUND_SPD_SWITCH=1; SOUND_PREFER_SWITCH=1; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=1540531940; Hm_lpvt_64ecd82404c51e03dc91cb9e8c025574=1540531984; from_lang_often=%5B%7B%22value%22%3A%22en%22%2C%22text%22%3A%22%u82F1%u8BED%22%7D%2C%7B%22value%22%3A%22zh%22%2C%22text%22%3A%22%u4E2D%u6587%22%7D%5D; to_lang_often=%5B%7B%22value%22%3A%22zh%22%2C%22text%22%3A%22%u4E2D%u6587%22%7D%2C%7B%22value%22%3A%22en%22%2C%22text%22%3A%22%u82F1%u8BED%22%7D%5D'}


Cookie = 'REALTIME_TRANS_SWITCH=1; FANYI_WORD_SWITCH=1; HISTORY_SWITCH=1; SOUND_SPD_SWITCH=1; SOUND_PREFER_SWITCH=1; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=1540531940; Hm_lpvt_64ecd82404c51e03dc91cb9e8c025574=1540531984; from_lang_often=%5B%7B%22value%22%3A%22en%22%2C%22text%22%3A%22%u82F1%u8BED%22%7D%2C%7B%22value%22%3A%22zh%22%2C%22text%22%3A%22%u4E2D%u6587%22%7D%5D; to_lang_often=%5B%7B%22value%22%3A%22zh%22%2C%22text%22%3A%22%u4E2D%u6587%22%7D%2C%7B%22value%22%3A%22en%22%2C%22text%22%3A%22%u82F1%u8BED%22%7D%5D'

class fanYi:
    def __init__(self):
        self._session = requests.session()
        self._data = {
            'from': 'en',
            'to': 'zh',
            'query': '',
            'transtype': 'realtime',
            'simple_means_flag': '3',
            'sign': '',
            'token': ''
        }

    def _set_words(self, words):
        self._words = words

    def _get_token(self):
        response = self._session.get(URL, headers=HEADERS2)
        html = response.text
        li = re.search(r"<script>\s*window\[\'common\'\] = ([\s\S]*?)</script>", html)
        token = re.search(r"token: \'([a-zA-Z0-9]+)\',", li.group(1))
        self._data['token'] = token.group(1)

    def _get_sign(self):
        with open('baidufanyi.js') as f:
            js = f.read()
        sign = execjs.compile(js).call('e', self._words)
        self._data['sign'] = sign

    def _translate(self):
        self._get_token()
        self._get_sign()
        self._data['query'] = self._words
        # string = ''
        # cookie = self._session.cookies.get_dict()
        # for key in cookie:
        #     string += key + '=' + cookie[key] + '; '
        # # response = self._session.post(REALTRANSLATE_API, data=self._data, headers=HEADERS)
        # HEADERS['Cookie'] = string + Cookie
        # print(HEADERS)
        response = self._session.post(TRANSLATE_API, data=self._data, headers=HEADERS2)
        Dict = json.loads(response.content.decode('utf-8'))
        print(Dict['trans_result']['data'][0]['dst'])


if __name__ == "__main__":
    fanyi = fanYi()
    while True:
        fanyi._set_words(input())
        fanyi._translate()

baidufanyi.js

var i = "320305.131321201"

function a(r){if(Array.isArray(r)){for(var o=0,t=Array(r.length);o<r.length;o++)t[o]=r[o];
return t}return Array.from(r)}

function n(r,o){for(var t=0;t<o.length-2;t+=3){var a=o.charAt(t+2);a=a>="a"?a.charCodeAt(0)-87:Number(a),a="+"===o.charAt(t+1)?r>>>a:r<<a,r="+"===o.charAt(t)?r+a&4294967295:r^a
}return r}

function e(r) {
        var o = r.match(/[\uD800-\uDBFF][\uDC00-\uDFFF]/g);
        if (null === o) {
            var t = r.length;
            t > 30 && (r = "" + r.substr(0, 10) + r.substr(Math.floor(t / 2) - 5, 10) + r.substr( - 10, 10))
        } else {
            for (var e = r.split(/[\uD800-\uDBFF][\uDC00-\uDFFF]/), C = 0, h = e.length, f = []; h > C; C++)"" !== e[C] && f.push.apply(f, a(e[C].split(""))),
            C !== h - 1 && f.push(o[C]);
            var g = f.length;
            g > 30 && (r = f.slice(0, 10).join("") + f.slice(Math.floor(g / 2) - 5, Math.floor(g / 2) + 5).join("") + f.slice( - 10).join(""))
        }
        var u = void 0,
        l = "" + String.fromCharCode(103) + String.fromCharCode(116) + String.fromCharCode(107);
        u = null !== i ? i: (i = window[l] || "") || "";
        for (var d = u.split("."), m = Number(d[0]) || 0, s = Number(d[1]) || 0, S = [], c = 0, v = 0; v < r.length; v++) {
            var A = r.charCodeAt(v);
            128 > A ? S[c++] = A: (2048 > A ? S[c++] = A >> 6 | 192 : (55296 === (64512 & A) && v + 1 < r.length && 56320 === (64512 & r.charCodeAt(v + 1)) ? (A = 65536 + ((1023 & A) << 10) + (1023 & r.charCodeAt(++v)), S[c++] = A >> 18 | 240, S[c++] = A >> 12 & 63 | 128) : S[c++] = A >> 12 | 224, S[c++] = A >> 6 & 63 | 128), S[c++] = 63 & A | 128)
        }
        for (var p = m,
        F = "" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(97) + ("" + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(54)), D = "" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(51) + ("" + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(98)) + ("" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(102)), b = 0; b < S.length; b++) p += S[b],
        p = n(p, F);
        return p = n(p, D),
        p ^= s,
        0 > p && (p = (2147483647 & p) + 2147483648),
        p %= 1e6,
        p.toString() + "." + (p ^ m)
    }

有道翻译

单纯翻译：

__author__ = 'hugowen'
# -*- coding:utf-8 -*-
'''
[33]python Web 框架:Tornado
https://blog.csdn.net/xc_zhou/article/details/80637714
https://pypi.org/project/tornado/
'''

from bs4 import BeautifulSoup
import tornado.httpclient

def is_chinese(uchar):
    if uchar >= u'\u4e00' and uchar <= u'\u9fa5':
        return True
    else:
        return False


if __name__ == "__main__":
    cli = tornado.httpclient.HTTPClient()
    link = 'http://dict.youdao.com/search?q='
    search = input('search: ')
    link += search
    # print(link)
    data = cli.fetch(link)
    body = data.body
    soup = BeautifulSoup(body)
    group = soup.find_all(class_ = 'trans-container')
    if is_chinese(search):
        content = group[0].find('ul').find('p')
        print(content.find_all('span')[0].get_text())
        for ele in content.find_all(class_ = 'contentTitle'):
            print(ele.find('a').get_text())
    else:
        content = group[0].find('ul').find_all('li')
        for ele in content:
            print(ele.get_text())

翻译并记录翻译日志： 输出到 words.md 文件中

__author__ = 'hugowen'
# -*- coding:utf-8 -*-
'''
[33]python Web 框架:Tornado
https://blog.csdn.net/xc_zhou/article/details/80637714
https://pypi.org/project/tornado/
'''

from bs4 import BeautifulSoup
import tornado.httpclient

def is_chinese(uchar):
    if uchar >= u'\u4e00' and uchar <= u'\u9fa5':
        return True
    else:
        return False

def translate(search):
    cli = tornado.httpclient.HTTPClient()
    link = 'http://dict.youdao.com/search?q='
    link += search
    data = cli.fetch(link)
    body = data.body
    soup = BeautifulSoup(body, "html.parser")
    group = soup.find_all(class_ = 'trans-container')
    result = []
    if is_chinese(search):
        content = group[0].find('ul').find('p')
        print(content.find_all('span')[0].get_text())
        for ele in content.find_all(class_ = 'contentTitle'):
            result.append(ele.find('a').get_text())
    else:
        content = group[0].find('ul').find_all('li')
        for ele in content:
            result.append(ele.get_text())
    return result


if __name__ == "__main__":
    while True:
        search = input('search: ')
        if search in ['Q', 'q']:
            break
        result = translate(search)
        with open('words.md', 'a') as f:
            f.write('###  ▌' + search + '\n')
            for r in result:
                f.write('- [ ] ' + r +'\n')
                print('▌> ' + r)