import json
import re
import docx
import requests
import time
def trans(str):
# 模拟浏览器发送请求
url = "http://fanyi.baidu.com/basetrans"
headers = {
"User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1"}
data = {"query": str,
"from": "zh",
"to": "en"}
response = requests.post(url, data=data, headers=headers)
result = response.content.decode()
result = json.loads(result)
result_str = ""
for i in result["trans"]: # 对两条两条以上的安全措施的翻译结果进行提取
result_str += i["dst"]
if result['trans'].index(i) + 1 != len(result["trans"]): # 在一个元素的非最后一行加回车
result_str += "\n"
return result_str
list = []
doc = docx.Document("222.docx")
# 对文档中每个表格的每个对象进行遍历
for table in doc.tables:
for row in table.rows:
list.clear() # 保证每一行重复读取时,不会重复翻译,同一列可以正常翻译
for cell in row.cells:
try:
# 判断是否含有中文
zhPattern = re.compile(u'[\u4e00-\u9fa5]+')
contents = u'{}'.format(cell.text)
match = zhPattern.search(contents)
if match:
for list_str in list:
# 解决重复翻译问题,对每个元素对象和列表中的元素进行对比
if cell.text.startswith(list_str):
break
else:
list.append(cell.text)
result = trans(cell.text)
cell.text += "\n"
cell.text += result
else:
pass
except Exception as e:
print(e)
# time.sleep(0.1)
doc.save("test_pudate.docx")
用python对word文档表格里面的内容进行翻译
最新推荐文章于 2023-04-29 20:46:07 发布