如何使用Python批量docx转pdf？

最新推荐文章于 2024-06-12 12:58:00 发布

CC's_lab

最新推荐文章于 2024-06-12 12:58:00 发布

阅读量562

点赞数 9

文章标签： python pdf

本文链接：https://blog.csdn.net/m0_61947256/article/details/135367465

版权

如何使用Python批量docx转pdf？

1. 安装相关Python包

pip install -i https://pypi.tuna.tsinghua.edu.cn/simple python-docx pywin32

2. 测试单个文件转换

import os
import win32com.client
from docx import Document

def convert_to_pdf(docx_file, pdf_file):
    # 打开DOCX文件
    doc = Document(docx_file)

    # 将DOCX文件保存为临时的HTML文件
    temp_html = "temp.html"
    doc.save(temp_html)

    # 创建PDF转换器对象
    word_app = win32com.client.Dispatch("Word.Application")
    word_app.Visible = False

    # 打开临时HTML文件
    doc_pdf = word_app.Documents.Open(os.path.abspath(temp_html))

    # 将临时HTML文件保存为PDF
    doc_pdf.SaveAs(os.path.abspath(pdf_file), FileFormat=17)

    # 关闭文档和应用程序
    doc_pdf.Close()
    word_app.Quit()

    # 删除临时HTML文件
    os.remove(temp_html)

# 调用函数进行转换
convert_to_pdf(r"C:\Users\lishi\Desktop\测试一.docx", r"C:\Users\lishi\Desktop\测试一.pdf")

3.批量文件转换

import os
import win32com.client
from docx import Document

def convert_to_pdf(docx_file, pdf_file):
    # 打开DOCX文件
    doc = Document(docx_file)

    # 将DOCX文件保存为临时的HTML文件
    temp_html = "temp.html"
    doc.save(temp_html)

    # 创建PDF转换器对象
    word_app = win32com.client.Dispatch("Word.Application")
    word_app.Visible = False

    # 打开临时HTML文件
    doc_pdf = word_app.Documents.Open(os.path.abspath(temp_html))

    # 将临时HTML文件保存为PDF
    output_pdf_file = os.path.splitext(docx_file)[0] + ".pdf"
    doc_pdf.SaveAs(os.path.abspath(output_pdf_file), FileFormat=17)

    # 关闭文档和应用程序
    doc_pdf.Close()
    word_app.Quit()

    # 删除临时HTML文件
    os.remove(temp_html)

# 指定待转换的文件夹路径
folder_path = r"C:\Users\lishi\Desktop\test"

# 获取文件夹下所有的文件
file_list = os.listdir(folder_path)

# 遍历文件列表
for file_name in file_list:
    # 检查文件扩展名是否为.docx
    if file_name.endswith(".docx"):
        # 构建文件的完整路径
        file_path = os.path.join(folder_path, file_name)

        # 调用函数进行转换
        convert_to_pdf(file_path, file_path.replace(".docx", ".pdf"))