python 翻译模块 翻译API使用(百度、有道、谷歌)

1、翻译模块、api使用分析

1、translate库:使用简单,但是有次数限制,翻译的准确性中等;
2、百度api(推荐使用): 代码简单,有模块,但是需要注册,获取key值,翻译的准确性中下;
3、chrome翻译api:代码复杂,次数限制, 但是翻译的准确性较高;
4、有道翻译:代码复杂,次数限制,准确性中等;

2、使用方式:

  1. translate库:
from translate import Translator
#在任何两种语言之间,中文翻译成英文
translator=Translator(from_lang="chinese",to_lang="english")
translation = translator.translate("床前明月光,疑是地上霜;举头望明月,低头思故乡")
print(translation)

2、 百度api:
需要先去百度翻译进行注册,获取appid、secretkey值,记得填写可访问的服务器ip

# 百度通用翻译API,不包含词典、tts语音合成等资源,如有相关需求请联系translate_api@baidu.com
# coding=utf-8

import http.client
import hashlib
import urllib
import random
import json

def trans_lang(q):
    trans_result = q
    # 百度appid和密钥需要通过注册百度【翻译开放平台】账号后获得
    appid = 'xxx'  # 填写你的appid
    secretKey = 'xxx'  # 填写你的密钥

    httpClient = None
    myurl = '/api/trans/vip/translate'  # 通用翻译API HTTP地址

    fromLang = 'en'  # 原文语种
    toLang = 'zh'  # 译文语种
    salt = random.randint(32768, 65536)
    # 手动录入翻译内容,q存放
    sign = appid + q + str(salt) + secretKey
    sign = hashlib.md5(sign.encode()).hexdigest()
    myurl = myurl + '?appid=' + appid + '&q=' + urllib.parse.quote(q) + '&from=' + fromLang + \
            '&to=' + toLang + '&salt=' + str(salt) + '&sign=' + sign

    # 建立会话,返回结果
    try:
        httpClient = http.client.HTTPConnection('api.fanyi.baidu.com')
        httpClient.request('GET', myurl)
        # response是HTTPResponse对象
        response = httpClient.getresponse()
        result_all = response.read().decode("utf-8")
        result = json.loads(result_all)
        trans_result = result['trans_result'][0]['dst']
    except Exception as e:
        print(e)
    finally:
        if httpClient:
            httpClient.close()
    return trans_result


if __name__ == '__main__':
    a = 'Customer Not Available & Mobile not reachable Customer Not Available & Mobile not reachable by SR: ANIL KUMAR (170435) (117510), MobileNo: 9996366909'
    print(trans_lang(a))

3、chrome翻译api:

import requests
import re
import json
import time
class GoogleTranslator ():
    _host = 'translate.google.cn'
    _headers = {
        'Host': _host,
        'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Mobile Safari/537.36',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
        'Accept-Language': 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3',
        'Accept-Encoding': 'gzip, deflate, br',
        'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
        'Referer': 'https://' + _host,
        'Connection': 'keep-alive',
        'Cache-Control': 'max-age=0'
    }
    _language = {
        'afrikaans': 'af',
        'arabic': 'ar',
        'belarusian': 'be',
        'bulgarian': 'bg',
        'catalan': 'ca',
        'czech': 'cs',
        'welsh': 'cy',
        'danish': 'da',
        'german': 'de',
        'greek': 'el',
        'english': 'en',
        'esperanto': 'eo',
        'spanish': 'es',
        'estonian': 'et',
        'persian': 'fa',
        'finnish': 'fi',
        'french': 'fr',
        'irish': 'ga',
        'galician': 'gl',
        'hindi': 'hi',
        'croatian': 'hr',
        'hungarian': 'hu',
        'indonesian': 'id',
        'icelandic': 'is',
        'italian': 'it',
        'hebrew': 'iw',
        'japanese': 'ja',
        'korean': 'ko',
        'latin': 'la',
        'lithuanian': 'lt',
        'latvian': 'lv',
        'macedonian': 'mk',
        'malay': 'ms',
        'maltese': 'mt',
        'dutch': 'nl',
        'norwegian': 'no',
        'polish': 'pl',
        'portuguese': 'pt',
        'romanian': 'ro',
        'russian': 'ru',
        'slovak': 'sk',
        'slovenian': 'sl',
        'albanian': 'sq',
        'serbian': 'sr',
        'swedish': 'sv',
        'swahili': 'sw',
        'thai': 'th',
        'filipino': 'tl',
        'turkish': 'tr',
        'ukrainian': 'uk',
        'vietnamese': 'vi',
        'yiddish': 'yi',
        'chinese_simplified': 'zh-CN',
        'chinese_traditional': 'zh-TW',
        'auto': 'auto'
    }
    _url = 'https://' + _host + '/translate_a/single'
    _params = {
            'client': 'webapp',
            'sl': 'en',
            'tl': 'zh-CN',
            'hl': 'zh-CN',
            'dt': 'at',
            'dt': 'bd',
            'dt': 'ex',
            'dt': 'ld',
            'dt': 'md',
            'dt': 'qca',
            'dt': 'rw',
            'dt': 'rm',
            'dt': 'ss',
            'dt': 't',
            'otf': '1',
            'ssel': '0',
            'tsel': '0',
            'kc': '1'
    }
    __cookies = None
    __googleTokenKey = '376032.257956'
    __googleTokenKeyUpdataTime = 600.0
    __googleTokenKeyRetireTime = time.time() + 600.0
    def __init__(self, src = 'en', dest = 'zh-CN', tkkUpdataTime = 600.0):
        if src not in self._language and src not in self._language.values():
            src = 'auto'
        if dest not in self._language and dest not in self._language.values():
            dest = 'auto'
        self._params['sl'] = src
        self._params['tl'] = dest
        self.googleTokenKeyUpdataTime = tkkUpdataTime
        self.__updateGoogleTokenKey()
    def __updateGoogleTokenKey(self):
        self.__googleTokenKey = self.__getGoogleTokenKey()
        self.__googleTokenKeyRetireTime = time.time() + self.__googleTokenKeyUpdataTime
    def __getGoogleTokenKey(self):
        """Get the Google TKK from https://translate.google.cn"""
        # TKK example: '435075.3634891900'
        result = ''
        try:
            res = requests.get('https://' + self._host, timeout = 3)
            res.raise_for_status()
            self.__cookies = res.cookies
            result = re.search(r'tkk\:\'(\d+\.\d+)?\'', res.text).group(1)
        except requests.exceptions.ReadTimeout as ex:
            print('ERROR: ' + str(ex))
            time.sleep(1)
        return result
    def __getGoogleToken(self, a, TKK):
        """Calculate Google tk from TKK """
        # https://www.cnblogs.com/chicsky/p/7443830.html
        # if text = 'Tablet Developer' and TKK = '435102.3120524463', then tk = '315066.159012'
        def RL(a, b):
            for d in range(0, len(b)-2, 3):
                c = b[d + 2]
                c = ord(c[0]) - 87 if 'a' <= c else int(c)
                c = a >> c if '+' == b[d + 1] else a << c
                a = a + c & 4294967295 if '+' == b[d] else a ^ c
            return a
        g = []
        f = 0
        while f < len(a):
            c = ord(a[f])
            if 128 > c:
                g.append(c)
            else:
                if 2048 > c:
                    g.append((c >> 6) | 192)
                else:
                    if (55296 == (c & 64512)) and (f + 1 < len(a)) and (56320 == (ord(a[f+1]) & 64512)):
                        f += 1
                        c = 65536 + ((c & 1023) << 10) + (ord(a[f]) & 1023)
                        g.append((c >> 18) | 240)
                        g.append((c >> 12) & 63 | 128)
                    else:
                        g.append((c >> 12) | 224)
                        g.append((c >> 6) & 63 | 128)
                g.append((c & 63) | 128)
            f += 1
        e = TKK.split('.')
        h = int(e[0]) or 0
        t = h
        for item in g:
            t += item
            t = RL(t, '+-a^+6')
        t = RL(t, '+-3^+b+-f')
        t ^= int(e[1]) or 0
        if 0 > t:
            t = (t & 2147483647) + 2147483648
        result = t % 1000000
        return str(result) + '.' + str(result ^ h)
    def translate(self, text):
        if time.time() > self.__googleTokenKeyRetireTime:
            self.__updateGoogleTokenKey()
        data = {'q': text}
        self._params['tk'] = self.__getGoogleToken(text, self.__googleTokenKey)
        result = ''
        try:
            res = requests.post(self._url,
                            headers = self._headers,
                            cookies = self.__cookies,
                            data = data,
                            params = self._params,
                            timeout = 6)
            res.raise_for_status()
            jsonText = res.text
            if len(jsonText)>0:
                jsonResult = json.loads(jsonText)
                if len(jsonResult[0])>0:
                    for item in jsonResult[0]:
                        result += item[0]
            return result
        except Exception as ex:
            print('ERROR: ' + str(ex))
            return ''
import time
from GoogleTranslator import GoogleTranslator
def readFile(fileName):
    with open(fileName, 'r') as f:
        paragraph = ''
        for line in f:
            if line[0]!='\n':
                paragraph += line.strip('\n')
            else:
                if len(paragraph)>0:
                    yield paragraph
                    paragraph = ''
        if len(paragraph)>0:
            yield paragraph

4、有道翻译api:

from urllib import request, parse
import json
from faker import Faker

class trans(object):
    def __init__(self):
        self.url = 'http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule'

    def tran(self, text):
        index = text.find("http")
        text = text[:index]
        text = text.replace('\n', '').replace('#', '').replace('RT ', '').replace(':', '')
        ua = Faker().user_agent()
        headers = {
            'User-Agent': ua,
            'Host': 'fanyi.youdao.com',
            'Origin': 'http://fanyi.youdao.com',
            'Referer': 'http://fanyi.youdao.com/',

        }
        # 表单数据
        from_data = {
            'i': text,
            'from': 'UTO',
            'to': 'UTO',
            'smartresult': 'dict',
            'client': 'fanyideskweb',
            'doctype': 'json',
            'version': '2.1',
            'keyfrom': 'fanyi.web',
            'action': 'FY_BY_REALTlME'
        }
        from_data = parse.urlencode(from_data).encode('utf-8')
        req = request.Request(self.url, from_data, headers)
        res = request.urlopen(req).read().decode("utf-8")
        target = json.loads(res)
        try:
            result = target['translateResult'][0][0]['tgt']
        except:
            result = "Translate failed"
        return result

if __name__ == '__main__':
    Obj = trans()
    for i in range(1000):
        print(i)
        text = "Egypt disinfecting streets in Cairo to combat the spread of Coronavirus.\n#COVID19Africa #CoronavirusPandemic\n https://t…"
        res = Obj.tran(text)
        print(res)
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值