python自动翻译pdf_python pdf一键翻译脚本

安装第三方库

pip install pdfminer3k

配置注册百度翻译平台账号,获取appid 及key

详细代码

import hashlib

import random

import urllib.parse

import requests

from concurrent import futures

from io import StringIO

from pdfminer.pdfinterp import PDFResourceManager

from pdfminer.pdfinterp import process_pdf

from pdfminer.converter import TextConverter

from pdfminer.layout import LAParams

def read_from_pdf(file_path):

'''

解析pdf文件

'''

with open(file_path, 'rb') as file:

resource_manager = PDFResourceManager()

return_str = StringIO()

lap_params = LAParams()

device = TextConverter(

resource_manager, return_str, laparams=lap_params)

process_pdf(resource_manager, device, file)

device.close()

content = return_str.getvalue()

return_str.close()

return content

def create_sign(q, appid, salt, key):

'''

制造签名

'''

sign = str(appid) + str(q) + str(salt) + str(key)

md5 = hashlib.md5()

md5.update(sign.encode('utf-8'))

return md5.hexdigest()

def create_url(q, url):

'''

根据参数构造query字典

'''

fro = 'auto'

to = 'zh'

salt = random.randint(32768, 65536)

sign = create_sign(q, appid, salt, key)

url = url+'?appid='+str(appid)+'&q='+urllib.parse.quote(q)+'&from='+str(fro)+'&to='+str(to)+'&salt='+str(salt)+'&sign='+str(sign)

return url

def translate(q):

url = 'http://api.fanyi.baidu.com/api/trans/vip/translate'

url = create_url(q, url)

r = requests.get(url)

txt = r.json()

if txt.get('trans_result', -1) == -1:

print('程序已经出错,请查看报错信息:\n{}'.format(txt))

return '这一部分翻译错误\n'

return txt['trans_result'][0]['dst']

def clean_data(data):

'''

将输入的data返回成为段落组成的列表

'''

data = data.replace('\n\n', '闲谈后')

data = data.replace('\n', ' ')

return data.split('闲谈后')

def _main(pdf_path, txt_path):

# try:

data = read_from_pdf(pdf_path)

data_list = clean_data(data)

with futures.ThreadPoolExecutor(20) as excuter:

zh_txt = excuter.map(translate, data_list)

# zh_txt = [translate(txt) for txt in data_list]

zh_txt = list(zh_txt)

article = '\n\n'.join(zh_txt)

print(article)

with open(txt_path, 'w', encoding='utf-8') as f:

f.write(article)

# except Exception:

# return -1

if __name__ == '__main__':

appid =*** #填入你的 appid ,为int类型

key ='***' #填入你的 key ,为str类型

_main('1.pdf', '1.txt') #填入 pdf 路径与翻译完毕之后的 txt 路径

标签:return,python,一键,str,import,pdf,txt,data

来源: https://blog.csdn.net/Tiger_lin1/article/details/86680925

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值