一、API获取
自行注册
https://www.xfyun.cn/service/niutrans
二、用到的库
python_docx
requests
提示什么安装什么就好了
三、使用说明
3.1 主要为了解决英文文献复制出来不是连续的,而是和PDF中一样 一行一段的问题,会导致翻译出现一点偏差
3.2复制英文文献到txt中,每段之间用空行隔开即可
3.3 代码会自动整理,把每段合一起(会生成一个中间文件,路径见代码,请自行更改)
3.4 会在结果路径生成结果word文档
会将英文和翻译的结果输出
4 代码
默认英译中,有其他语言需求请自行更改
API有五千字的限制,故采用分段翻译,如果一段超过五千字,请自行再分。
import docx
from docx.shared import Pt
from docx.oxml.ns import qn
import os
import requests
import datetime
import hashlib
import base64
import hmac
import json
class get_result(object):
def __init__(self,host):
# 应用ID(到控制台获取)
self.APPID = ""
# 接口APISercet(到控制台机器翻译服务页面获取)
self.Secret = ""
# 接口APIKey(到控制台机器翻译服务页面获取)
self.APIKey= ""
# 以下为POST请求
self.Host = host
self.RequestUri = "/v2/ots"
# 设置url
# print(host)
self.url="https://"+host+self.RequestUri
self.HttpMethod = "POST"
self.Algorithm = "hmac-sha256"
self.HttpProto = "HTTP/1.1"
# 设置当前时间
curTime_utc = datetime.datetime.utcnow()
self.Date = self.httpdate(curTime_utc)
# 设置业务参数
# 语种列表参数值请参照接口文档:https://www.xfyun.cn/doc/nlp/niutrans/API.html
if line !="\n":
self.Text=line
self.BusinessArgs={
"from": "en",
"to": "cn",
}
#语言在这里改
def hashlib_256(self, res):
m = hashlib.sha256(bytes(res.encode(encoding='utf-8'))).digest()
result = "SHA-256=" + base64.b64encode(m).decode(encoding='utf-8')
return result
def httpdate(self, dt):
"""
Return a string representation of a date according to RFC 1123
(HTTP/1.1).
The supplied date must be in UTC.
"""
weekday = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"][dt.weekday()]
month = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep",
"Oct", "Nov", "Dec"][dt.month - 1]
return "%s, %02d %s %04d %02d:%02d:%02d GMT" % (weekday, dt.day, month,
dt.year, dt.hour, dt.minute, dt.second)
def generateSignature(self, digest):
signatureStr = "host: " + self.Host + "\n"
signatureStr += "date: " + self.Date + "\n"
signatureStr += self.HttpMethod + " " + self.RequestUri \
+ " " + self.HttpProto + "\n"
signatureStr += "digest: " + digest
signature = hmac.new(bytes(self.Secret.encode(encoding='utf-8')),
bytes(signatureStr.encode(encoding='utf-8')),
digestmod=hashlib.sha256).digest()
result = base64.b64encode(signature)
return result.decode(encoding='utf-8')
def init_header(self, data):
digest = self.hashlib_256(data)
#print(digest)
sign = self.generateSignature(digest)
authHeader = 'api_key="%s", algorithm="%s", ' \
'headers="host date request-line digest", ' \
'signature="%s"' \
% (self.APIKey, self.Algorithm, sign)
#print(authHeader)
headers = {
"Content-Type": "application/json",
"Accept": "application/json",
"Method": "POST",
"Host": self.Host,
"Date": self.Date,
"Digest": digest,
"Authorization": authHeader
}
return headers
def get_body(self):
content = str(base64.b64encode(self.Text.encode('utf-8')), 'utf-8')
postdata = {
"common": {"app_id": self.APPID},
"business": self.BusinessArgs,
"data": {
"text": content,
}
}
body = json.dumps(postdata)
#print(body)
return body
def call_url(self):
if self.APPID == '' or self.APIKey == '' or self.Secret == '':
print('Appid 或APIKey 或APISecret 为空!请打开demo代码,填写相关信息。')
else:
code = 0
body=self.get_body()
headers=self.init_header(body)
#print(self.url)
response = requests.post(self.url, data=body, headers=headers,timeout=8)
status_code = response.status_code
#print(response.content)
if status_code!=200:
# 鉴权失败
print("Http请求失败,状态码:" + str(status_code) + ",错误信息:" + response.text)
print("请根据错误信息检查代码,接口文档:https://www.xfyun.cn/doc/nlp/niutrans/API.html")
else:
# 鉴权成功
respData = json.loads(response.text)
doc.add_paragraph(respData['data']['result']['trans_result']['src']).paragraph_format.first_line_indent = Pt(14) * 2
doc.add_paragraph(respData['data']['result']['trans_result']['dst']).paragraph_format.first_line_indent = Pt(14) * 2 ##返回值是dict字典
#print(respData)
# 以下仅用于调试
code = str(respData["code"])
if code!='0':
print("请前往https://www.xfyun.cn/document/error-code?code=" + code + "查询解决办法")
def txtchange():
file=open(path_A,encoding='UTF-8')
file_write=open(path_B,mode='w',encoding='UTF-8')
linesa = file.readlines()
print(linesa)
pi_string=''
for line in linesa:
line = line.strip()
if len(line):
pi_string = pi_string+' '+line
else:
file_write.write(pi_string+'\n')
pi_string = ''
file_write.write(pi_string+'\n')
def file_exist(dirpath):
if not os.path.exists(dirpath): # os模块判断并创建
os.mkdir(dirpath)
if __name__ == '__main__':
path_A=r"D:\\Desktop\\参考文献1.txt" #复制英文的保存的文件路径
path_B=r"D:\\Desktop\\参考文献2.txt" #中间过程文件
path_C=r"D:\\Desktop\\result.docx" #结果文件
txtchange()
doc=docx.Document()
doc.styles['Normal'].font.size = Pt(14)
doc.styles['Normal'].font.name = u'Times New Roman'
doc.styles['Normal'].element.rPr.rFonts.set(qn('w:eastAsia'), '宋体')
##示例: host="ntrans.xfyun.cn"域名形式
host = "ntrans.xfyun.cn"
#初始化类
file=open(path_B,encoding='UTF-8')
file_write=open(path_C,mode='w',encoding='UTF-8')
linesb = file.readlines()
for line in linesb:
if line =='\n':
next
else:
gClass=get_result(host)
gClass.call_url()
doc.save(path_C)
print("翻译完成!")