人狠不多话,直接上代码
# -*- coding:utf-8 -*-
from datetime import datetime
from wsgiref.handlers import format_date_time
from time import mktime
import hashlib
import base64
import hmac
from urllib.parse import urlencode
import json
import requests
import easygui
import docx
from spire.doc import *
from spire.doc.common import *
from tkinter.messagebox import *
class AssembleHeaderException(Exception):
def __init__(self, msg):
self.message = msg
class Url:
def __init__(this, host, path, schema):
this.host = host
this.path = path
this.schema = schema
pass
class WebsocketDemo:
def __init__(self, APPId, APISecret, APIKey, Text):
self.appid = APPId
self.apisecret = APISecret
self.apikey = APIKey
self.text = Text
self.url = 'https://cn-huadong-1.xf-yun.com/v1/private/s37b42a45'
# calculate sha256 and encode to base64
def sha256base64(self, data):
sha256 = hashlib.sha256()
sha256.update(data)
digest = base64.b64encode(sha256.digest()).decode(encoding='utf-8')
return digest
def parse_url(self, requset_url):
stidx = requset_url.index("://")
host = requset_url[stidx + 3:]
schema = requset_url[:stidx + 3]
edidx = host.index("/")
if edidx <= 0:
raise AssembleHeaderException("invalid request url:" + requset_url)
path = host[edidx:]
host = host[:edidx]
u = Url(host, path, schema)
return u
# build websocket auth request url
def assemble_ws_auth_url(self, requset_url, method="POST", api_key="", api_secret=""):
u = self.parse_url(requset_url)
host = u.host
path = u.path
now = datetime.now()
date = format_date_time(mktime(now.timetuple()))
# print(date)
# date = "Thu, 12 Dec 2019 01:57:27 GMT"
signature_origin = "host: {}\ndate: {}\n{} {} HTTP/1.1".format(host, date, method, path)
# print(signature_origin)
signature_sha = hmac.new(api_secret.encode('utf-8'), signature_origin.encode('utf-8'),
digestmod=hashlib.sha256).digest()
signature_sha = base64.b64encode(signature_sha).decode(encoding='utf-8')
authorization_origin = "api_key=\"%s\", algorithm=\"%s\", headers=\"%s\", signature=\"%s\"" % (
api_key, "hmac-sha256", "host date request-line", signature_sha)
authorization = base64.b64encode(authorization_origin.encode('utf-8')).decode(encoding='utf-8')
# print(authorization_origin)
values = {
"host": host,
"date": date,
"authorization": authorization
}
return requset_url + "?" + urlencode(values)
def get_body(self):
body = {
"header": {
"app_id": self.appid,
"status": 3,
# "uid":"your_uid"
},
"parameter": {
"midu_correct": {
# "res_id":"your_res_id",
"output_result": {
"encoding": "utf8",
"compress": "raw",
"format": "json"
}
}
},
"payload": {
"text": {
"encoding": "utf8",
"compress": "raw",
"format": "plain",
"status": 3,
"text": base64.b64encode(self.text.encode("utf-8")).decode('utf-8')
}
}
}
return body
def get_result(self):
request_url = self.assemble_ws_auth_url(self.url, "POST", self.apikey, self.apisecret)
headers = {'content-type': "application/json", 'host': 'api.xf-yun.com', 'app_id': self.appid}
body = self.get_body()
response = requests.post(request_url, data=json.dumps(body), headers=headers)
# print('onMessage:\n' + response.content.decode())
tempResult = json.loads(response.content.decode())
# print('公文校对text字段解析:\n' + base64.b64decode(tempResult['payload']['output_result']['text']).decode())
par_jch = (base64.b64decode(tempResult['payload']['output_result']['text']).decode())
# print(json.loads(par_jch))
return json.loads(par_jch)
def out_result(input_text):
# 控制台获取
app_id = "********" #向讯飞云控制台获取
api_secret = "abcdefghijklmn" #向讯飞云控制台获取
api_key = "abcdefg123456789" #向讯飞云控制台获取
# 需纠错文本
in_text = input_text
# 向服务器传送数据样本
in_demo = WebsocketDemo(app_id, api_secret, api_key, in_text)
output_result = in_demo.get_result()
# 返回一个字典,用于存放纠错结果
return output_result
def get_text(input_file):
"""
1.获取指定路径文件名
2.生成文件路径
3.读取文件;
4.输出全文段落数(即该文档最高调用次数)
5.获取文档内容,用于传入接口进行纠错识别,返回值为按换行分割后的文本内容,返回类型为列表,用于存放文档内容
"""
# 使用easygui打开文件
# file = docx.Document(r"{}".format(easygui.fileopenbox(default=os.path.join(os.path.expanduser("~"), 'Desktop\\'))))
file = docx.Document(input_file)
""" 输出段落数 """
nub = str(len(file.paragraphs))
par_nub = "正在纠错,请等待......"
print(par_nub)
""" 获取文档内容 """
list_wb = []
for par_str in range(len(file.paragraphs)):
text_input = file.paragraphs[par_str].text
text_strip = text_input.replace(" ", "")
if text_strip != "":
list_wb.append(text_strip)
return list_wb
def result_cleaning(intput_result_list):
original_text_list = []
proofreading_results_list = []
for the_result in intput_result_list:
for the_re in range(len(the_result['data']['checklist'])):
suggest_list = the_result['data']['checklist'][the_re - 1]['suggest']
suggest_word = ""
if len(suggest_list) == 0:
suggest_word = "无"
else:
suggest_word = "或".join(suggest_list)
proofreading_results = "错误类型:{a}\n错误词:{b}\n建议:“{c}”".format(
a=the_result['data']['checklist'][the_re - 1]['type']['name'],
b=the_result['data']['checklist'][the_re - 1]['word'],
c=suggest_word)
original_text_list.append(the_result['data']['checklist'][the_re - 1]['context'])
proofreading_results_list.append(proofreading_results)
return original_text_list, proofreading_results_list
def get_file():
the_file = r"{}".format(easygui.fileopenbox(default=os.path.join(os.path.expanduser("~"), 'Desktop\\')))
return the_file
def add_result(word_list, the_result_list, word_list_len, into_file):
# 创建一个 Document 类的对象并加载一个 Word 文档
doc = Document()
the_file = into_file
doc.LoadFromFile(the_file)
# 定义一个列表,用于存放原文档文本
input_text_list = word_list # ["补助", "行政"]
# 定义一个列表,用于存储纠错后文本
text_jch_list = the_result_list # ["还有如Lab、YUV、XYZ等色彩空间范德萨发生型。", "还有如Lab、YUV、XY萨发生型。"]
for i in word_list_len:
try:
# 查找要添加评论的文本
the_text_word = doc.FindString(input_text_list[i - 1], True, True)
# 创建一个评论并设置评论的内容和作者
comment = Comment(doc)
comment.Body.AddParagraph().Text = text_jch_list[i - 1]
comment.Format.Author = "讯飞公文校对"
# 将找到的文本作为文本范围,并获取其所属的段落
text_range = the_text_word.GetAsOneRange()
paragraph = text_range.OwnerParagraph
# 将评论添加到段落中
paragraph.ChildObjects.Insert(paragraph.ChildObjects.IndexOf(text_range) + 1, comment)
# 创建评论起始标记和结束标记,并将它们设置为创建的评论的起始标记和结束标记
commentStart = CommentMark(doc, CommentMarkType.CommentStart)
commentEnd = CommentMark(doc, CommentMarkType.CommentEnd)
commentStart.CommentId = comment.Format.CommentId
commentEnd.CommentId = comment.Format.CommentId
# 在找到的文本之前和之后插入创建的评论起始和结束标记
paragraph.ChildObjects.Insert(paragraph.ChildObjects.IndexOf(text_range), commentStart)
paragraph.ChildObjects.Insert(paragraph.ChildObjects.IndexOf(text_range) + 1, commentEnd)
except:
pass
# 保存文档
doc.SaveToFile(r"{a}添加批注--{b}".format(
a=os.path.join(os.path.expanduser("~"), 'Desktop\\'),
b=os.path.basename(the_file)))
doc.Close()
def running_correct():
result_list = []
showinfo(title="提示", message="请选择要修改的文件!")
the_file = get_file()
into_text = get_text(the_file)
for i in into_text:
a_result = out_result(i)
result_list.append(a_result)
mian_list = result_cleaning(result_list)
list_to_word = mian_list[0]
list_to_result = mian_list[1]
input_range = range(len(list_to_word))
add_result(list_to_word, list_to_result, input_range, the_file)
if __name__ == '__main__':
running_correct()