谷歌翻译不提供接口,想要使用谷歌的翻译结果,需要借用爬虫技术,将待翻译的文本传入,抓取页面,解析出翻译结果,经测试,这种方法的翻译效果较差,不建议使用。
百度翻译提供接口,可在百度翻译注册开发者后获取appid与secretKey直接在python中进行调用,这种方法翻译效果较好,但仍存在一些问题,对于那些翻译不太好的文本,只能手动复制到谷歌翻译或必应翻译中进行翻译。
#-*-coding:utf-8-*-
# date: 2018-11-07
import requests
import json
import execjs
import hashlib
import urllib
import random
#需要先用pip install pyexecjs 安装,用来执行js脚本
class Py4Js():
def __init__(self):
self.ctx = execjs.compile("""
function TL(a) {
var k = "";
var b = 406644;
var b1 = 3293161072;
var jd = ".";
var $b = "+-a^+6";
var Zb = "+-3^+b+-f";
for (var e = [], f = 0, g = 0; g < a.length; g++) {
var m = a.charCodeAt(g);
128 > m ? e[f++] = m : (2048 > m ? e[f++] = m >> 6 | 192 : (55296 == (m & 64512) && g + 1 < a.length && 56320 == (a.charCodeAt(g + 1) & 64512) ? (m = 65536 + ((m & 1023) << 10) + (a.charCodeAt(++g) & 1023),
e[f++] = m >> 18 | 240,
e[f++] = m >> 12 & 63 | 128) : e[f++] = m >> 12 | 224,
e[f++] = m >> 6 & 63 | 128),
e[f++] = m & 63 | 128)
}
a = b;
for (f = 0; f < e.length; f++) a += e[f],
a = RL(a, $b);
a = RL(a, Zb);
a ^= b1 || 0;
0 > a && (a = (a & 2147483647) + 2147483648);
a %= 1E6;
return a.toString() + jd + (a ^ b)
};
function RL(a, b) {
var t = "a";
var Yb = "+";
for (var c = 0; c < b.length - 2; c += 3) {
var d = b.charAt(c + 2),
d = d >= t ? d.charCodeAt(0) - 87 : Number(d),
d = b.charAt(c + 1) == Yb ? a >>> d: a << d;
a = b.charAt(c) == Yb ? a + d & 4294967295 : a ^ d
}
return a
}
""")
def getTk(self, text):
'''google translate请求参数中tk值是根据内容实时变化的,是由js动态生成,因此此函数调用js代码执行后返回text对应的tk值'''
return self.ctx.call("TL", text)
def buildUrl(text, tk):
baseUrl = 'https://translate.google.cn/translate_a/single'
baseUrl += '?client=t&'
baseUrl += 's1=auto&'
baseUrl += 't1=zh-CN&'
baseUrl += 'h1=zh-CN&'
baseUrl += 'dt=at&'
baseUrl += 'dt=bd&'
baseUrl += 'dt=ex&'
baseUrl += 'dt=ld&'
baseUrl += 'dt=md&'
baseUrl += 'dt=qca&'
baseUrl += 'dt=rw&'
baseUrl += 'dt=rm&'
baseUrl += 'dt=ss&'
baseUrl += 'dt=t&'
baseUrl += 'ie=UTF-8&'
baseUrl += 'oe=UTF-8&'
baseUrl += 'otf=1&'
baseUrl += 'pc=1&'
baseUrl += 'ssel=0&'
baseUrl += 'tsel=0&'
baseUrl += 'kc=2&'
baseUrl += 'tk=' + str(tk) + '&'
baseUrl += 'q=' + text
return baseUrl
def google_translate(text):
js = Py4Js()
header = {
'authority':'translate.google.cn',
'method':'GET',
'path':'',
'scheme':'https',
'accept':'*/*',
'accept-encoding':'gzip, deflate, br',
'accept-language':'zh-CN,zh;q=0.9',
'cookie':'',
'user-agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.108 Safari/537.36',
'x-client-data':'CIa2yQEIpbbJAQjBtskBCPqcygEIqZ3KAQioo8oBGJGjygE='
}
url = buildUrl(text, js.getTk(text))
res = ''
try:
r = requests.get(url)
result = json.loads(r.text)
if result[7] != None:
# 如果我们文本输错,提示你是不是要找xxx的话,那么重新把xxx正确的翻译之后返回
# 谷歌返回的结果是一个json格式的数据,我们将其变成一个嵌套的list,可以发现该list长度为9,第零个元素就是翻译结果,第七个结果是一些提示信息。
try:
correctText=result[7][0].replace('<b><i>', ' ').replace('</i></b>', '')
print(correctText)
correctUrl=buildUrl(correctText,js.getTk(correctText))
correctR=requests.get(correctUrl)
newResult=json.loads(correctR.text)
res=newResult[0][0][0]
except Exception as e:
print(e)
res = result[0][0][0]
else:
res = result[0][0][0]
except Exception as e:
res = ''
print(url)
print("翻译"+text+"失败")
print("错误信息:")
print(e)
finally:
return res
def baidu_translate(text, from_lang='auto', to_lang='zh', appid = 'your appid', secretKey = 'your secretKey'):
url = 'https://api.fanyi.baidu.com/api/trans/vip/translate'
salt = random.randint(32768, 65536)
sign = appid + text + str(salt) + secretKey
m = hashlib.md5()
m.update(sign.encode("utf8"))
sign = m.hexdigest()
url = url + '?appid=' + appid + '&q=' + urllib.parse.quote(text) + '&from=' + from_lang + '&to=' + to_lang + '&salt=' + str(salt) + '&sign=' + sign
try:
result = eval(requests.get(url).text)
result = result['trans_result'][0]['dst']
except Exception as e:
result = ''
print (e)
finally:
return result
if __name__ == '__main__':
res = google_translate('oh shit')
print(res)
参考: