Aliyun——机器翻译——文献翻译

最新推荐文章于 2024-07-17 21:04:45 发布

IsYuh

最新推荐文章于 2024-07-17 21:04:45 发布

阅读量234

点赞数

分类专栏： Python 文章标签：机器翻译 python 阿里云

本文链接：https://blog.csdn.net/a1150162163/article/details/130068454

版权

Python 专栏收录该内容

15 篇文章 0 订阅

订阅专栏

# -*- coding: utf-8 -*-
'''
通过豆瓣源安装【alibabacloud_alimt20181012.client】和【alibabacloud_tea_openapi】
pip install alibabacloud-alimt20181012 -i https://pypi.douban.com/simple/
pip install alibabacloud_tea_openapi -i https://pypi.douban.com/simple/
pip install python_docx
'''
'''
一段最长五千的单词！！！，如超过五千，自行分行
'''
import docx
from docx.shared import Pt
from docx.oxml.ns import qn
import os
import re
from alibabacloud_alimt20181012.client import Client as alimt20181012Client
from alibabacloud_tea_openapi import models as open_api_models
from alibabacloud_alimt20181012 import models as alimt_20181012_models
from alibabacloud_tea_util import models as util_models

ACCESS_KEY_ID = ""# Acess_key_id和Acess_key_secret分别修改为自己申请的通用翻译api
ACCESS_KEY_SECRET = ""

def create_client(
    access_key_id: str,
    access_key_secret: str,
) -> alimt20181012Client:
    config = open_api_models.Config(
        access_key_id=access_key_id,
        access_key_secret=access_key_secret
    )
    config.endpoint = f'mt.cn-hangzhou.aliyuncs.com'
    return alimt20181012Client(config)
def translate(text):
    client = create_client(ACCESS_KEY_ID, ACCESS_KEY_SECRET)
    translate_general_request = alimt_20181012_models.TranslateGeneralRequest(
        format_type='text',
        source_language='en',
        target_language='zh',
        source_text=text,
        scene='general'
    )
    runtime = util_models.RuntimeOptions()
    resp = client.translate_general_with_options(translate_general_request, runtime)
    #s=source_text
    #s = re.sub(u"[\\x00-\\x08\\x0b\\x0e-\\x1f\\x7f]", "?", s)
    #写入word
    doc.add_paragraph(resp.body.data.__dict__['translated']).paragraph_format.first_line_indent = Pt(14) * 2    ##返回值是dict字典
    #return resp.body.data.__dict__['translated']
#print(translate("Rome is not built in a day."))

#将短句的分段文献并成一整段
def txtchange():
    file=open(path_A,encoding='UTF-8')
    file_write=open(path_B,mode='w',encoding='UTF-8')
    linesa = file.readlines()
    print(linesa)
    pi_string=''
    for line in linesa:
        line = line.strip()
        if len(line):
            pi_string = pi_string+' '+line         
        else:
            file_write.write(pi_string+'\n')
            pi_string = ''
    file_write.write(pi_string+'\n')        


def file_exist(dirpath):
    if not os.path.exists(dirpath):  # os模块判断并创建
        os.mkdir(dirpath)



if __name__ == '__main__':
    path_A="D:/Desktop/参考文献1.txt"  #复制英文的保存的文件路径，每段之间空一行  
    path_B="D:/Desktop/参考文献2.txt"  #中间过程文件
    path_C="D:/Desktop/result.docx"   #结果文件
	file_exist(path_A)
	file_exist(path_B)
	file_exist(path_C)
	
    txtchange()
    
    doc=docx.Document()
    doc.styles['Normal'].font.size = Pt(14)
    doc.styles['Normal'].font.name = u'Times New Roman'
    doc.styles['Normal'].element.rPr.rFonts.set(qn('w:eastAsia'), '宋体')

    #初始化类
    file=open(path_B,encoding='UTF-8')
    file_write=open(path_C,mode='w',encoding='UTF-8')
    linesb = file.readlines()
    for line in linesb:
        if line =='\n':
            next 
        else:
            translate(line)
    doc.save(path_C)
    print("翻译完成！")