python 英语翻译_python实现在线翻译功能

对于需要大量翻译的数据,人工翻译太慢,此时需要使用软件进行批量翻译。

1.使用360的翻译

def fanyi_word_cn(string):

url="https://fanyi.so.com/index/search"

#db_path = './db/tasks.db'

Form_Data= {}

#这里输入要翻译的英文

Form_Data['query']= string

Form_Data['eng']= '1'

#用urlencode把字典变成字符串,#服务器不接受字典,只接受字符串和二进制

data= parse.urlencode(Form_Data).encode('utf-8')

#改成服务器可识别的数据后,请求,获取回应数据

response= request.urlopen(url, data)

html= response.read().decode("utf-8")#解码方式

#java中的对象(集合)和数组(元素为集合),loads可转Python字典

result= json.loads(html)

#字典调取键名data下的键名fanyi,获取其值

translate_result= result["data"]["fanyi"]

#print(translate_result)

return translate_result

2.使用Google自带的API来翻译

注意,需要安装API模块即可。

pip install translator

#google api, per 1000 words everyday

def translate_cn_api(content):

translator= Translator(to_lang="zh")

translation = translator.translate(content)

return translation

3.使用Google翻译来做,是由于Google提供的API有字节限制,每天只能翻译1000字。

备注:环境准备

3.1 java环境

3.2 安装execjs模块

pip install PyExecJS

3.3 两个实现模块

HandleJs.py

#coding=utf-8

import execjs

class Py4Js():

def __init__(self):

self.ctx = execjs.compile("""

function TL(a) {

var k = "";

var b = 406644;

var b1 = 3293161072;

var jd = ".";

var $b = "+-a^+6";

var Zb = "+-3^+b+-f";

for (var e = [], f = 0, g = 0; g < a.length; g++) {

var m = a.charCodeAt(g);

128 > m ? e[f++] = m : (2048 > m ? e[f++] = m >> 6 | 192 : (55296 == (m & 64512) && g + 1 < a.length && 56320 == (a.charCodeAt(g + 1) & 64512) ? (m = 65536 + ((m & 1023) << 10) + (a.charCodeAt(++g) & 1023),

e[f++] = m >> 18 | 240,

e[f++] = m >> 12 & 63 | 128) : e[f++] = m >> 12 | 224,

e[f++] = m >> 6 & 63 | 128),

e[f++] = m & 63 | 128)

}

a = b;

for (f = 0; f < e.length; f++) a += e[f],

a = RL(a, $b);

a = RL(a, Zb);

a ^= b1 || 0;

0 > a && (a = (a & 2147483647) + 2147483648);

a %= 1E6;

return a.toString() + jd + (a ^ b)

};

function RL(a, b) {

var t = "a";

var Yb = "+";

for (var c = 0; c < b.length - 2; c += 3) {

var d = b.charAt(c + 2),

d = d >= t ? d.charCodeAt(0) - 87 : Number(d),

d = b.charAt(c + 1) == Yb ? a >>> d: a << d;

a = b.charAt(c) == Yb ? a + d & 4294967295 : a ^ d

}

return a

}

""")

def getTk(self,text):

return self.ctx.call("TL",text)

main.py

#coding=utf-8

#import urllib.request

import urllib2

from HandleJs import Py4Js

from translate import Translator

import requests

# Example: find_last('aaaa', 'a') returns 3

# Make sure your procedure has a return statement.

def find_last(string,str):

last_position=-1

while True:

position=string.find(str,last_position+1)

if position==-1:

return last_position

last_position=position

def open_url(url):

headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20100101 Firefox/23.0'}

req = urllib2.Request(url = url,headers=headers)

response = urllib2.urlopen(req)

data = response.read().decode('utf-8')

return data

def translate_core(content,tk, language):

if len(content) > 4891:

print("too long byte >4891")

return

content = urllib2.quote(content)

if language == 'de':

url = "http://translate.google.cn/translate_a/single?client=t"+ "&sl=de&tl=zh-CN&hl=zh-CN&dt=at&dt=bd&dt=ex&dt=ld&dt=md&dt=qca"+"&dt=rw&dt=rm&dt=ss&dt=t&ie=UTF-8&oe=UTF-8&clearbtn=1&otf=1&pc=1"+"&srcrom=0&ssel=0&tsel=0&kc=2&tk=%s&q=%s"%(tk,content)

else:

url = "http://translate.google.cn/translate_a/single?client=t"+ "&sl=en&tl=zh-CN&hl=zh-CN&dt=at&dt=bd&dt=ex&dt=ld&dt=md&dt=qca"+"&dt=rw&dt=rm&dt=ss&dt=t&ie=UTF-8&oe=UTF-8&clearbtn=1&otf=1&pc=1"+"&srcrom=0&ssel=0&tsel=0&kc=2&tk=%s&q=%s"%(tk,content)

#result为json格式

result = open_url(url)

#print('results:' + result)

if len(content) < 10:

end = result.find("\",")

if end > 4:

return result[4:end]

else:

result_all = ''

if language == 'de':

result_all = result.split(',null,"de",null,null,')[0].replace('[[', '').replace(']]', ']')[1:]

else:

result_all = result.split(',null,"en",null,null,')[0].replace('[[', '').replace(']]', ']')[1:]

#print('result_all:' + result_all)

output_cn = ''

#解析中文字段并拼接

list = result_all.split('],[')

for i in range(len(list)-1):

end = list[i].find("\",")

tmp_buf = list[i][1:end]

output_cn = output_cn + tmp_buf

return output_cn

def translate_normal(content, language):

js = Py4Js()

tk = js.getTk(content)

#print('english:' + content)

cn_buf = translate_core(content,tk, language)

#print('Chinese:' + cn_buf)

return cn_buf

def translate_cn(content, language):

LEN_LIMIT = 4891

all_len = len(content)

print('en:' + content)

if all_len > LEN_LIMIT:

content_cn = ''

while True:

content_limit = content[0:LEN_LIMIT]

limit_end = find_last(content_limit, '.') + 1

#print('limit_end:' + str(limit_end))

if limit_end == 0:

limit_end = find_last(content_limit, ' ') + 1

if limit_end == 0:

limit_end = LEN_LIMIT

content_en = content[0:limit_end]

leave_len = all_len - limit_end

if content_en == '':

break;

#print('content_en:' + content_en)

content_cn = content_cn + translate_normal(content_en, language);

content = content[limit_end:]

return content_cn

else:

return translate_normal(content, language)

#google api, per 1000 words everyday

def translate_cn_api(content):

translator= Translator(to_lang="zh")

translation = translator.translate(content)

return translation

if __name__ == "__main__":

content = """Beautiful is better than ugly.

Explicit is better than implicit.

Simple is better than complex.

Complex is better than complicated.

Namespaces are one honking great idea -- let's do more of those!"""

#

content = """

IT-Grundschutz M5.131: Absicherung von IP-Protokollen unter Windows Server 2003."""

#content = 'High'

content = """Beautiful is better than ugly.

Explicit is better than implicit.

Simple is better than complex.

Complex is better than complicated.

Namespaces are one honking great idea -- let's do more of those!"""

language = 'en'

test = translate_cn(content.replace('\n', ''), language)

print('ok:' + test)

#content = 'Checks version'

此处实现了德语翻译成中文和英文翻译成中文。

以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持脚本之家。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值