Word文档合并工具保留样式与分页_python-docx docx文件合并,插入指定位置,要求保留源格式-CSDN博客

本文链接：https://blog.csdn.net/Leon_Jinhai_Sun/article/details/147414508

要优化合并Word文档的代码，使其保留原始样式和分页，我们需要更细致地处理文档内容的复制。以下是改进后的代码：

from docx import Document
from docx.oxml import parse_xml
from docx.oxml.ns import nsdecls
from docx.shared import Pt
import os

def merge_word_files(file1_path, file2_path, output_path):
    """
    合并两个Word文档并保存为新的文档，保留原始样式和分页
    
    参数:
        file1_path (str): 第一个Word文件路径
        file2_path (str): 第二个Word文件路径
        output_path (str): 合并后的输出文件路径
    """
    # 创建新的文档对象
    merged_document = Document()
    
    # 复制第一个文档的内容
    copy_content(merged_document, file1_path)
    
    # 在文档之间添加分页符（可选）
    # merged_document.add_page_break()
    
    # 复制第二个文档的内容
    copy_content(merged_document, file2_path)
    
    # 保存合并后的文档
    merged_document.save(output_path)
    print(f"文档已成功合并并保存到: {output_path}")

def copy_content(target_doc, source_path):
    """
    将源文档的内容复制到目标文档中，保留原始样式
    
    参数:
        target_doc: 目标文档对象
        source_path (str): 源文档文件路径
    """
    source_doc = Document(source_path)
    
    # 复制节属性（页面设置等）
    for section in source_doc.sections:
        target_section = target_doc.add_section(section.start_type)
        target_section.orientation = section.orientation
        target_section.page_width = section.page_width
        target_section.page_height = section.page_height
        target_section.left_margin = section.left_margin
        target_section.right_margin = section.right_margin
        target_section.top_margin = section.top_margin
        target_section.bottom_margin = section.bottom_margin
        target_section.header_distance = section.header_distance
        target_section.footer_distance = section.footer_distance
        target_section.gutter = section.gutter
    
    # 复制所有段落和表格
    for element in source_doc.element.body:
        if element.tag.endswith('p'):
            # 处理段落
            paragraph = source_doc.paragraphs[source_doc.element.body.index(element)]
            new_paragraph = target_doc.add_paragraph()
            new_paragraph.style = paragraph.style
            
            # 复制段落格式
            new_paragraph_format = new_paragraph.paragraph_format
            source_format = paragraph.paragraph_format
            new_paragraph_format.alignment = source_format.alignment
            new_paragraph_format.left_indent = source_format.left_indent
            new_paragraph_format.right_indent = source_format.right_indent
            new_paragraph_format.first_line_indent = source_format.first_line_indent
            new_paragraph_format.space_before = source_format.space_before
            new_paragraph_format.space_after = source_format.space_after
            new_paragraph_format.line_spacing = source_format.line_spacing
            new_paragraph_format.keep_together = source_format.keep_together
            new_paragraph_format.keep_with_next = source_format.keep_with_next
            new_paragraph_format.page_break_before = source_format.page_break_before
            new_paragraph_format.widow_control = source_format.widow_control
            
            # 复制段落内容
            for run in paragraph.runs:
                new_run = new_paragraph.add_run(run.text)
                new_run.bold = run.bold
                new_run.italic = run.italic
                new_run.underline = run.underline
                new_run.font.name = run.font.name
                new_run.font.size = run.font.size
                new_run.font.color.rgb = run.font.color.rgb
                
        elif element.tag.endswith('tbl'):
            # 处理表格
            table_index = [e for e in source_doc.element.body if e.tag.endswith('tbl')].index(element)
            source_table = source_doc.tables[table_index]
            
            # 创建新表格
            new_table = target_doc.add_table(
                rows=len(source_table.rows),
                cols=len(source_table.columns),
                style=source_table.style
            )
            
            # 复制表格内容
            for i, row in enumerate(source_table.rows):
                for j, cell in enumerate(row.cells):
                    new_cell = new_table.cell(i, j)
                    new_cell.text = cell.text
                    
                    # 复制单元格格式
                    for paragraph in cell.paragraphs:
                        new_paragraph = new_cell.add_paragraph()
                        new_paragraph.style = paragraph.style
                        
                        # 复制段落格式
                        new_paragraph_format = new_paragraph.paragraph_format
                        source_format = paragraph.paragraph_format
                        new_paragraph_format.alignment = source_format.alignment
                        new_paragraph_format.left_indent = source_format.left_indent
                        new_paragraph_format.right_indent = source_format.right_indent
                        new_paragraph_format.first_line_indent = source_format.first_line_indent
                        new_paragraph_format.space_before = source_format.space_before
                        new_paragraph_format.space_after = source_format.space_after
                        new_paragraph_format.line_spacing = source_format.line_spacing
                        
                        # 复制运行格式
                        for run in paragraph.runs:
                            new_run = new_paragraph.add_run(run.text)
                            new_run.bold = run.bold
                            new_run.italic = run.italic
                            new_run.underline = run.underline
                            new_run.font.name = run.font.name
                            new_run.font.size = run.font.size
                            new_run.font.color.rgb = run.font.color.rgb

# 使用示例
if __name__ == "__main__":
    file1 = r"C:\Leon\python_project\oceanxecm\2025\04\20250422-AI-投标书\word\附件4_法定代表人身份证明书.docx"  # 第一个Word文件路径
    file2 = r"C:\Leon\python_project\oceanxecm\2025\04\20250422-AI-投标书\word\附件5_法定代表人授权委托书.docx"  # 第二个Word文件路径
    output = r"C:\Leon\python_project\oceanxecm\2025\04\20250422-AI-投标书\word\merged_document.docx"  # 合并后的输出文件路径

    merge_word_files(file1, file2, output)