处理TXT文档中乱七八糟的句子,并且根据标题等级写入到word

import re
from Baidu_Text_transAPI_2 import baidu_api_fanyi
from docx.shared import Cm, RGBColor
import docx
from docx.shared import Pt
from docx.oxml.ns import qn
with open('2.txt', 'r', encoding='gbk', errors='ignore') as file:
    text = file.read()
    # 使用split()将字符串分割成行
    lines = text.split('\n')
    # 使用列表推导式去除空行
    cleaned_lines = [line for line in lines if line.strip()]
    # 使用join()将处理后的行重新组合成一个字符串
    text = '\n'.join(cleaned_lines)
    '''一级标题'''
    pattern = r'(\d\.)\n'  # 匹配1个数字+.+换行符情况
    text = re.sub(pattern, lambda match: match.group(1), text)
    '''二级标题'''
    pattern = r'(\n[A-Z]\.)\s*'
    # 使用sub函数替换匹配项,将换行符替换为空格
    text = re.sub(pattern, lambda match: match.group(1), text)
    '''三级标题'''
    pattern = r'\((\d)\)\s*'  # 匹配(1个数字)+换行符情况
    text = re.sub(pattern, lambda match: '(' + match.group(1) + ')', text)
    '''四级标题'''
    pattern = r'(\([a-z]\))\s*'  # 匹配(1个小写字母)+换行符情况
    text = re.sub(pattern, lambda match: match.group(1), text)
    '''五级标题'''
    pattern = r'\n(\d)\s*'  # 匹配"1个数字+回车"
    # 使用re.sub进行替换,仅保留数字部分
    text = re.sub(pattern, r'\n\1 ', text)

    pattern = r' (\.) '  # 去除点号前后的空格
    text = re.sub(pattern, r'\1', text)
    pattern = r' (\.)'  # 去除点号前的空格
    text = re.sub(pattern, r'\1', text)
    pattern = r'(\.) '  # 去除点号后的空格
    text = re.sub(pattern, r'\1', text)

    pattern = r'\n([a-z].*)'  # 小写字母开头则去除换行
    text = re.sub(pattern, r'\1', text)

    pattern = r'\s*([!.,?;:=].*)'  # 标点符号开头则去除换行
    text = re.sub(pattern, r'\1', text)

    pattern = r'([,=])\s*'  # 标点符号结尾则去除换行
    text = re.sub(pattern, r'\1', text)

    pattern = r'(\d+) (\d+)'  # 数字之间空格
    text = re.sub(pattern, r'\1\2', text)

    pattern = r'(\()\s*'  # 左括号换行
    text = re.sub(pattern, r'\1', text)

    pattern = r":\((\d)\)"  # 匹配":(1个数字)"
    text = re.sub(pattern, r":\n(\1)", text)

    pattern = r":\(([a-z])\)"  # 匹配":(1个小写字母)"
    text = re.sub(pattern, r":\n(\1)", text)

    pattern = r":([A-Z].)"  # 匹配":"后面大写字母序号起头
    text = re.sub(pattern, r":\n\1", text)

    pattern = r":(\d.)"  # 匹配":"后面数字+.起头
    text = re.sub(pattern, r":\n\1", text)

    pattern = r":([a-z])\n"  # 匹配":"+1个小写字母
    text = re.sub(pattern, r":\n\1 ", text)
    #pattern = r"([a-z]).([a-z])\n"  # 匹配1个小写字母+"."+1个小写字母
    #text = re.sub(pattern, r"\1.\n\2 ", text)

    pattern = r"(\d)(\.)(\()(\d)(\))"
    replacement = r"\1\2\n\3\4\5"
    text = re.sub(pattern, replacement, text)

    pattern = r"(\(\d\)\.[A-Z]\.)(\s*)"
    replacement = r"\1"
    text = re.sub(pattern, replacement, text)

    pattern = r'([A-Z]\.[A-Z]\.)\n'
    text = re.sub(pattern, lambda match: match.group(1) + '', text)

    pattern = r"(\.)(\()"
    text = re.sub(pattern, r"\1\n\2", text)

    pattern = r"(in.)\n(\(\d)"
    text = re.sub(pattern, r"\1\2", text)

    pattern = r":(\n)(\d.\d+)"
    text = re.sub(pattern, r":\2", text)
    pattern = r":(\n)(\d.*')"
    text = re.sub(pattern, r":\2", text)

    pattern = r' (on)\n'
    text = re.sub(pattern, r" \1 ", text, flags=re.IGNORECASE)
    pattern = r' (to)\n'
    text = re.sub(pattern, r" \1 ", text, flags=re.IGNORECASE)
    pattern = r' (and)\n'
    text = re.sub(pattern, r" \1 ", text, flags=re.IGNORECASE)
    pattern = r' (with)\n'
    text = re.sub(pattern, r" \1 ", text, flags=re.IGNORECASE)
    pattern = r' (at)\n'
    text = re.sub(pattern, r" \1 ", text, flags=re.IGNORECASE)
    pattern = r' (p/n)\n(\d)'
    text = re.sub(pattern, r" \1 \2", text, flags=re.IGNORECASE)

    print("处理结束!!!")
    # 打开文件以写入文本,如果文件不存在则创建它
    with open('output.txt', 'w', encoding='utf-8') as file:
        file.write(text)

def check_text_pattern(text):
    # 情况1:匹配开头是1个数字+"."
    pattern1 = r'^\d\.'
    if re.match(pattern1, text):
        return "情况1"
    # 情况2:匹配大写字母+"."
    pattern2 = r'^[A-Z]\.'
    if re.match(pattern2, text):
        return "情况2"
    # 情况3:匹配开头是“(”+“数字”+“)”
    pattern3 = r'^\((\d)\)'
    if re.match(pattern3, text):
        return "情况3"
    pattern4 = r'^\(([a-z])\)'  # 匹配(1个小写字母)+换行符情况
    if re.match(pattern4, text):
        return "情况4"
    pattern5 = r'^\d '  # 匹配1个数字开头
    if re.match(pattern5, text):
        return "情况5"
    # 否则返回情况6
    return "情况6"

with open('output.txt', 'r', encoding='utf-8', errors='ignore') as file:
    lines = file.readlines()  # 读取所有行到列表中
    # 创建Word文档
    doc = docx.Document()
    for line in lines:
        line=line.replace('\n', '')
        #print('----', line)
        if check_text_pattern(line)=='情况1':
            # 如果匹配成功,则去除前缀
            pattern = r'\d\.'
            # 使用正则表达式替换匹配到的内容为空字符串
            new_text = re.sub(pattern, '', line)
            # 创建一个段落,并设置为1级标题
            heading = doc.add_heading(level=1)
            # 左缩进
            heading.paragraph_format.left_indent = Cm(0)
            # 右缩进
            heading.paragraph_format.right_indent = Cm(0)
            # 首行缩进
            heading.paragraph_format.first_line_indent = Cm(0)
            # 行间距
            # p1.paragraph_format.line_spacing = Pt(20) # 20 磅
            heading.paragraph_format.line_spacing = 1.0
            # p.paragraph_format.line_spacing = 1.0  # 1.5倍行距 单倍行距 1.0
            "当line_spacing设置为长度值时表示绝对距离,"
            "设置为浮点数时表示行高的倍数"
            # 段前间距
            heading.paragraph_format.space_before = Pt(0)
            # 段后间距
            heading.paragraph_format.space_after = Pt(0)
            # 设置段落内部文字在遇到需分页情况时处理状态
            heading.paragraph_format.keep_together = True  # 段中不分页
            heading.paragraph_format.keep_with_next = True  # 与下段同页
            heading.paragraph_format.page_break_before = False  # 段前分页
            heading.paragraph_format.widow_control = True  # 孤行控制
            run = heading.add_run()  # 将提取的文本添加到段落中
            run = heading.add_run(line)
            # 设置Run对象的字体大小
            run.font.size = Pt(10)  # 设置为10磅
            run.font.name = 'Times New Roman'
            # 设置中文字体
            run.font.element.rPr.rFonts.set(qn('w:eastAsia'), "宋体")
            # 设置加粗
            run.font.bold = True
            run.font.color.rgb = RGBColor(0, 0, 0)
            '''**********************'''
            s = baidu_api_fanyi(new_text)
            # 创建一个段落,并设置为1级标题
            heading = doc.add_heading(level=1)
            # 左缩进
            heading.paragraph_format.left_indent = Cm(0)
            # 右缩进
            heading.paragraph_format.right_indent = Cm(0)
            # 首行缩进
            heading.paragraph_format.first_line_indent = Cm(0.38)
            # 行间距
            # p1.paragraph_format.line_spacing = Pt(20) # 20 磅
            heading.paragraph_format.line_spacing = 1.0
            # p.paragraph_format.line_spacing = 1.0  # 1.5倍行距 单倍行距 1.0
            "当line_spacing设置为长度值时表示绝对距离,"
            "设置为浮点数时表示行高的倍数"
            # 段前间距
            heading.paragraph_format.space_before = Pt(0)
            # 段后间距
            heading.paragraph_format.space_after = Pt(0)
            # 设置段落内部文字在遇到需分页情况时处理状态
            heading.paragraph_format.keep_together = False  # 段中不分页
            heading.paragraph_format.keep_with_next = False  # 与下段同页
            heading.paragraph_format.page_break_before = False  # 段前分页
            heading.paragraph_format.widow_control = True  # 孤行控制
            run = heading.add_run()  # 将提取的文本添加到段落中
            run = heading.add_run(s)
            # 设置Run对象的字体大小
            run.font.size = Pt(10)  # 设置为10磅
            run.font.name = 'Times New Roman'
            # 设置中文字体
            run.font.element.rPr.rFonts.set(qn('w:eastAsia'), "宋体")
            # 设置加粗
            run.font.bold = True
            run.font.color.rgb = RGBColor(0, 0, 0)

        elif check_text_pattern(line)=='情况2':
            pattern = r'^[A-Z]\.'
            new_text = re.sub(pattern, '', line)
            # 创建一个段落,并设置为1级标题
            heading = doc.add_heading(level=2)
            # 左缩进
            heading.paragraph_format.left_indent = Cm(0)
            # 右缩进
            heading.paragraph_format.right_indent = Cm(0)
            # 首行缩进
            heading.paragraph_format.first_line_indent = Cm(0)
            # 行间距
            # p1.paragraph_format.line_spacing = Pt(20) # 20 磅
            heading.paragraph_format.line_spacing = 1.0
            # p.paragraph_format.line_spacing = 1.0  # 1.5倍行距 单倍行距 1.0
            "当line_spacing设置为长度值时表示绝对距离,"
            "设置为浮点数时表示行高的倍数"
            # 段前间距
            heading.paragraph_format.space_before = Pt(0)
            # 段后间距
            heading.paragraph_format.space_after = Pt(0)
            # 设置段落内部文字在遇到需分页情况时处理状态
            heading.paragraph_format.keep_together = True  # 段中不分页
            heading.paragraph_format.keep_with_next = True  # 与下段同页
            heading.paragraph_format.page_break_before = False  # 段前分页
            heading.paragraph_format.widow_control = True  # 孤行控制
            run = heading.add_run()  # 将提取的文本添加到段落中
            run = heading.add_run(line)
            # 设置Run对象的字体大小
            run.font.size = Pt(10)  # 设置为10磅
            run.font.name = 'Times New Roman'
            # 设置中文字体
            run.font.element.rPr.rFonts.set(qn('w:eastAsia'), "宋体")
            # 设置加粗
            run.font.bold = True
            run.font.color.rgb = RGBColor(0, 0, 0)
            '''**********************'''
            s = baidu_api_fanyi(new_text)
            # 创建一个段落,并设置为2级标题
            heading = doc.add_heading(level=2)
            # 左缩进
            heading.paragraph_format.left_indent = Cm(0)
            # 右缩进
            heading.paragraph_format.right_indent = Cm(0)
            # 首行缩进
            heading.paragraph_format.first_line_indent = Cm(0.38)
            # 行间距
            # p1.paragraph_format.line_spacing = Pt(20) # 20 磅
            heading.paragraph_format.line_spacing = 1.0
            # p.paragraph_format.line_spacing = 1.0  # 1.5倍行距 单倍行距 1.0
            "当line_spacing设置为长度值时表示绝对距离,"
            "设置为浮点数时表示行高的倍数"
            # 段前间距
            heading.paragraph_format.space_before = Pt(0)
            # 段后间距
            heading.paragraph_format.space_after = Pt(0)
            # 设置段落内部文字在遇到需分页情况时处理状态
            heading.paragraph_format.keep_together = False  # 段中不分页
            heading.paragraph_format.keep_with_next = False  # 与下段同页
            heading.paragraph_format.page_break_before = False  # 段前分页
            heading.paragraph_format.widow_control = True  # 孤行控制
            run = heading.add_run()  # 将提取的文本添加到段落中
            run = heading.add_run(s)
            # 设置Run对象的字体大小
            run.font.size = Pt(10)  # 设置为10磅
            run.font.name = 'Times New Roman'
            # 设置中文字体
            run.font.element.rPr.rFonts.set(qn('w:eastAsia'), "宋体")
            # 设置加粗
            run.font.bold = True
            run.font.color.rgb = RGBColor(0, 0, 0)
        elif check_text_pattern(line) == '情况3':
            pattern = r'^\((\d)\)'  # 匹配(1个数字)
            new_text = re.sub(pattern, '', line)
            # 创建一个段落,并设置为1级标题
            heading = doc.add_heading(level=3)
            # 左缩进
            heading.paragraph_format.left_indent = Cm(0)
            # 右缩进
            heading.paragraph_format.right_indent = Cm(0)
            # 首行缩进
            heading.paragraph_format.first_line_indent = Cm(0)
            # 行间距
            # p1.paragraph_format.line_spacing = Pt(20) # 20 磅
            heading.paragraph_format.line_spacing = 1.0
            # p.paragraph_format.line_spacing = 1.0  # 1.5倍行距 单倍行距 1.0
            "当line_spacing设置为长度值时表示绝对距离,"
            "设置为浮点数时表示行高的倍数"
            # 段前间距
            heading.paragraph_format.space_before = Pt(0)
            # 段后间距
            heading.paragraph_format.space_after = Pt(0)
            # 设置段落内部文字在遇到需分页情况时处理状态
            heading.paragraph_format.keep_together = True  # 段中不分页
            heading.paragraph_format.keep_with_next = True  # 与下段同页
            heading.paragraph_format.page_break_before = False  # 段前分页
            heading.paragraph_format.widow_control = True  # 孤行控制
            run = heading.add_run()  # 将提取的文本添加到段落中
            run = heading.add_run(line)
            # 设置Run对象的字体大小
            run.font.size = Pt(10)  # 设置为10磅
            run.font.name = 'Times New Roman'
            # 设置中文字体
            run.font.element.rPr.rFonts.set(qn('w:eastAsia'), "宋体")
            # 设置加粗
            run.font.bold = True
            run.font.color.rgb = RGBColor(0, 0, 0)
            '''**********************'''
            s = baidu_api_fanyi(new_text)
            # 创建一个段落,并设置为3级标题
            heading = doc.add_heading(level=3)
            # 左缩进
            heading.paragraph_format.left_indent = Cm(0)
            # 右缩进
            heading.paragraph_format.right_indent = Cm(0)
            # 首行缩进
            heading.paragraph_format.first_line_indent = Cm(0.38)
            # 行间距
            # p1.paragraph_format.line_spacing = Pt(20) # 20 磅
            heading.paragraph_format.line_spacing = 1.0
            # p.paragraph_format.line_spacing = 1.0  # 1.5倍行距 单倍行距 1.0
            "当line_spacing设置为长度值时表示绝对距离,"
            "设置为浮点数时表示行高的倍数"
            # 段前间距
            heading.paragraph_format.space_before = Pt(0)
            # 段后间距
            heading.paragraph_format.space_after = Pt(0)
            # 设置段落内部文字在遇到需分页情况时处理状态
            heading.paragraph_format.keep_together = False  # 段中不分页
            heading.paragraph_format.keep_with_next = False  # 与下段同页
            heading.paragraph_format.page_break_before = False  # 段前分页
            heading.paragraph_format.widow_control = True  # 孤行控制
            run = heading.add_run()  # 将提取的文本添加到段落中
            run = heading.add_run(s)
            # 设置Run对象的字体大小
            run.font.size = Pt(10)  # 设置为10磅
            run.font.name = 'Times New Roman'
            # 设置中文字体
            run.font.element.rPr.rFonts.set(qn('w:eastAsia'), "宋体")
            # 设置加粗
            run.font.bold = True
            run.font.color.rgb = RGBColor(0, 0, 0)
        elif check_text_pattern(line) == '情况4':
            pattern = r'^\(([a-z])\)'  # 匹配(1个小写字母)
            new_text = re.sub(pattern, '', line)
            # 创建一个段落,并设置为4级标题
            heading = doc.add_heading(level=4)
            # 左缩进
            heading.paragraph_format.left_indent = Cm(0)
            # 右缩进
            heading.paragraph_format.right_indent = Cm(0)
            # 首行缩进
            heading.paragraph_format.first_line_indent = Cm(0)
            # 行间距
            # p1.paragraph_format.line_spacing = Pt(20) # 20 磅
            heading.paragraph_format.line_spacing = 1.0
            # p.paragraph_format.line_spacing = 1.0  # 1.5倍行距 单倍行距 1.0
            "当line_spacing设置为长度值时表示绝对距离,"
            "设置为浮点数时表示行高的倍数"
            # 段前间距
            heading.paragraph_format.space_before = Pt(0)
            # 段后间距
            heading.paragraph_format.space_after = Pt(0)
            # 设置段落内部文字在遇到需分页情况时处理状态
            heading.paragraph_format.keep_together = True  # 段中不分页
            heading.paragraph_format.keep_with_next = True  # 与下段同页
            heading.paragraph_format.page_break_before = False  # 段前分页
            heading.paragraph_format.widow_control = True  # 孤行控制
            run = heading.add_run()  # 将提取的文本添加到段落中
            run = heading.add_run(line)
            # 设置Run对象的字体大小
            run.font.size = Pt(10)  # 设置为10磅
            run.font.name = 'Times New Roman'
            # 设置中文字体
            run.font.element.rPr.rFonts.set(qn('w:eastAsia'), "宋体")
            # 设置加粗
            run.font.bold = True
            run.font.color.rgb = RGBColor(0, 0, 0)
            run.font.italic = False
            '''**********************'''
            s = baidu_api_fanyi(new_text)
            # 创建一个段落,并设置为4级标题
            heading = doc.add_heading(level=4)
            # 左缩进
            heading.paragraph_format.left_indent = Cm(0)
            # 右缩进
            heading.paragraph_format.right_indent = Cm(0)
            # 首行缩进
            heading.paragraph_format.first_line_indent = Cm(0.38)
            # 行间距
            # p1.paragraph_format.line_spacing = Pt(20) # 20 磅
            heading.paragraph_format.line_spacing = 1.0
            # p.paragraph_format.line_spacing = 1.0  # 1.5倍行距 单倍行距 1.0
            "当line_spacing设置为长度值时表示绝对距离,"
            "设置为浮点数时表示行高的倍数"
            # 段前间距
            heading.paragraph_format.space_before = Pt(0)
            # 段后间距
            heading.paragraph_format.space_after = Pt(0)
            # 设置段落内部文字在遇到需分页情况时处理状态
            heading.paragraph_format.keep_together = False  # 段中不分页
            heading.paragraph_format.keep_with_next = False  # 与下段同页
            heading.paragraph_format.page_break_before = False  # 段前分页
            heading.paragraph_format.widow_control = True  # 孤行控制
            run = heading.add_run()  # 将提取的文本添加到段落中
            run = heading.add_run(s)
            # 设置Run对象的字体大小
            run.font.size = Pt(10)  # 设置为10磅
            run.font.name = 'Times New Roman'
            # 设置中文字体
            run.font.element.rPr.rFonts.set(qn('w:eastAsia'), "宋体")
            # 设置加粗
            run.font.bold = True
            run.font.italic = False
            run.font.color.rgb = RGBColor(0, 0, 0)
        elif check_text_pattern(line) == '情况5':
            pattern = r'^\d '  # 匹配(1个小写字母)
            new_text = re.sub(pattern, '', line)
            # 创建一个段落,并设置为5级标题
            heading = doc.add_heading(level=5)
            # 左缩进
            heading.paragraph_format.left_indent = Cm(0)
            # 右缩进
            heading.paragraph_format.right_indent = Cm(0)
            # 首行缩进
            heading.paragraph_format.first_line_indent = Cm(0)
            # 行间距
            # p1.paragraph_format.line_spacing = Pt(20) # 20 磅
            heading.paragraph_format.line_spacing = 1.0
            # p.paragraph_format.line_spacing = 1.0  # 1.5倍行距 单倍行距 1.0
            "当line_spacing设置为长度值时表示绝对距离,"
            "设置为浮点数时表示行高的倍数"
            # 段前间距
            heading.paragraph_format.space_before = Pt(0)
            # 段后间距
            heading.paragraph_format.space_after = Pt(0)
            # 设置段落内部文字在遇到需分页情况时处理状态
            heading.paragraph_format.keep_together = True  # 段中不分页
            heading.paragraph_format.keep_with_next = False  # 与下段同页
            heading.paragraph_format.page_break_before = False  # 段前分页
            heading.paragraph_format.widow_control = True  # 孤行控制
            run = heading.add_run()  # 将提取的文本添加到段落中
            run = heading.add_run(line)
            # 设置Run对象的字体大小
            run.font.size = Pt(10)  # 设置为10磅
            run.font.name = 'Times New Roman'
            # 设置中文字体
            run.font.element.rPr.rFonts.set(qn('w:eastAsia'), "宋体")
            # 设置加粗
            run.font.bold = True
            run.font.color.rgb = RGBColor(0, 0, 0)
            run.font.italic = False
            '''**********************'''
            s = baidu_api_fanyi(new_text)
            # 创建一个段落,并设置为5级标题
            heading = doc.add_heading(level=5)
            # 左缩进
            heading.paragraph_format.left_indent = Cm(0)
            # 右缩进
            heading.paragraph_format.right_indent = Cm(0)
            # 首行缩进
            heading.paragraph_format.first_line_indent = Cm(0.38)
            # 行间距
            # p1.paragraph_format.line_spacing = Pt(20) # 20 磅
            heading.paragraph_format.line_spacing = 1.0
            # p.paragraph_format.line_spacing = 1.0  # 1.5倍行距 单倍行距 1.0
            "当line_spacing设置为长度值时表示绝对距离,"
            "设置为浮点数时表示行高的倍数"
            # 段前间距
            heading.paragraph_format.space_before = Pt(0)
            # 段后间距
            heading.paragraph_format.space_after = Pt(0)
            # 设置段落内部文字在遇到需分页情况时处理状态
            heading.paragraph_format.keep_together = False  # 段中不分页
            heading.paragraph_format.keep_with_next = False  # 与下段同页
            heading.paragraph_format.page_break_before = False  # 段前分页
            heading.paragraph_format.widow_control = True  # 孤行控制
            run = heading.add_run()  # 将提取的文本添加到段落中
            run = heading.add_run(s)
            # 设置Run对象的字体大小
            run.font.size = Pt(10)  # 设置为10磅
            run.font.name = 'Times New Roman'
            # 设置中文字体
            run.font.element.rPr.rFonts.set(qn('w:eastAsia'), "宋体")
            # 设置加粗
            run.font.bold = True
            run.font.italic = False
            run.font.color.rgb = RGBColor(0, 0, 0)
        else:
            new_text=line
            p = doc.add_paragraph()
            # 左缩进
            p.paragraph_format.left_indent = Cm(0)
            # 右缩进
            p.paragraph_format.right_indent = Cm(0)
            # 首行缩进
            p.paragraph_format.first_line_indent = Cm(0)
            # 行间距
            # p1.paragraph_format.line_spacing = Pt(20) # 20 磅
            p.paragraph_format.line_spacing = 1.0
            # p.paragraph_format.line_spacing = 1.0  # 1.5倍行距 单倍行距 1.0
            "当line_spacing设置为长度值时表示绝对距离,"
            "设置为浮点数时表示行高的倍数"
            # 段前间距
            p.paragraph_format.space_before = Pt(0)
            # 段后间距
            p.paragraph_format.space_after = Pt(0)
            # 设置段落内部文字在遇到需分页情况时处理状态
            p.paragraph_format.keep_together = False  # 段中不分页
            p.paragraph_format.keep_with_next = False  # 与下段同页
            p.paragraph_format.page_break_before = False  # 段前分页
            p.paragraph_format.widow_control = True  # 孤行控制
            run = p.add_run(line)
            # 设置Run对象的字体大小
            run.font.size = Pt(10)  # 设置为10磅
            run.font.name = 'Times New Roman'
            # 设置中文字体
            run.font.element.rPr.rFonts.set(qn('w:eastAsia'), "宋体")
            # 设置加粗
            run.font.bold = False
            run.font.color.rgb = RGBColor(0, 0, 0)
            '''**********************'''
            s = baidu_api_fanyi(new_text)
            p = doc.add_paragraph()
            # 左缩进
            p.paragraph_format.left_indent = Cm(0)
            # 右缩进
            p.paragraph_format.right_indent = Cm(0)
            # 首行缩进
            p.paragraph_format.first_line_indent = Cm(0)
            # 行间距
            # p1.paragraph_format.line_spacing = Pt(20) # 20 磅
            p.paragraph_format.line_spacing = 1.0
            # p.paragraph_format.line_spacing = 1.0  # 1.5倍行距 单倍行距 1.0
            "当line_spacing设置为长度值时表示绝对距离,"
            "设置为浮点数时表示行高的倍数"
            # 段前间距
            p.paragraph_format.space_before = Pt(0)
            # 段后间距
            p.paragraph_format.space_after = Pt(0)
            # 设置段落内部文字在遇到需分页情况时处理状态
            p.paragraph_format.keep_together = True  # 段中不分页
            p.paragraph_format.keep_with_next = False  # 与下段同页
            p.paragraph_format.page_break_before = False  # 段前分页
            p.paragraph_format.widow_control = True  # 孤行控制
            run = p.add_run(s)
            # 设置Run对象的字体大小
            run.font.size = Pt(10)  # 设置为10磅
            run.font.name = 'Times New Roman'
            # 设置中文字体
            run.font.element.rPr.rFonts.set(qn('w:eastAsia'), "宋体")
    # 保存文档
    doc.save('example.docx')


  • 2
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
WPS文字转Word文档格式乱七八糟可能是由于以下几个原因导致的: 1. 版本兼容性问题:WPS和Word是两款不同的办公软件,它们使用的文件格式不完全相同。当你使用WPS文字转换为Word文档时,某些格式可能无法完全转换或被转换成不同的格式,导致文档格式混乱。 2. 复杂格式处理问题:如果原始文档包含复杂的格式,如表格、图表、插图等,WPS文字转换为Word可能无法完全保留原始格式,并且可能会出现错位、缺失或乱码等问题。 3. 字体兼容性问题:WPS和Word使用的字体库有所不同,如果原始文档使用了某些特殊字体,在转换过程可能会导致字体替换或无法显示的问题,进而影响整体文档的格式。 为避免格式混乱,你可以尝试以下方法: 1. 使用相同软件:如果可能的话,建议在同一款软件完成文档的编辑和转换,例如,在WPS编辑的文档最好在WPS进行转换。 2. 简化格式:在进行转换前,尽量避免使用过多的复杂格式和特殊字体,尽量使用常见的字体和格式,以增加转换的成功率。 3. 逐步转换:如果你的文档比较复杂,可以将文档分成多个部分进行转换,以便更好地处理格式问题。 4. 检查转换结果:在转换后,务必仔细检查文档的格式,并进行必要的调整和修复,以确保文档的可读性和准确性。 需要注意的是,由于WPS和Word是两个不同的软件,无法保证在转换过程完全保留原始文档的格式,因此可能会存在格式混乱的情况。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值