python实现将docx文件转化为pdf文件（windows，linux两种不同处理办法）

小太阳xss

已于 2023-05-08 15:30:22 修改

阅读量4k

点赞数 2

分类专栏：知识文章标签： python linux windows

于 2020-04-29 18:54:36 首次发布

本文链接：https://blog.csdn.net/qq_38669698/article/details/105846197

版权

知识专栏收录该内容

15 篇文章

订阅专栏

一、在windows系统中

# -*- encoding:utf-8 -*-
"""
  windows系统下实现docx->pdf
"""

from win32com.client import Dispatch, constants, gencache


def doc2pdf(input, output):
    w = Dispatch('Word.Application')
    try:
        # 打开文件
        doc = w.Documents.Open(input, ReadOnly=1)
        # 转换文件
        doc.ExportAsFixedFormat(output, constants.wdExportFormatPDF,
                                Item=constants.wdExportDocumentWithMarkup,
                                CreateBookmarks=constants.wdExportCreateHeadingBookmarks)
        return True
    except:
        return False
    finally:
        w.Quit(constants.wdDoNotSaveChanges)


def GenerateSupport():
    gencache.EnsureModule('{00020905-0000-0000-C000-000000000046}', 0, 8, 4)


def main():
    input_file = r'绝对地址/文件名.docx'
    output_file = r'绝对地址/文件名.pdf'
    # GenerateSupport()
    rc = doc2pdf(input_file , output_file )
    if rc:
        print('转换成功')
    else:
        print('转换失败')


if __name__ == '__main__':
    main()

2.在linux系统中

# -*- coding: utf-8 -*-
"""
linux platform word to pdf
"""
import subprocess
import os

try:
    from comtypes import client
except ImportError:
    client = None 

try:
    from win32com.client import constants, gencache
except ImportError:
    constants = None
    gencache = None


def doc2pdf_linux(docPath, pdfPath):
    """
    允许的文档格式：doc，docx
    仅在linux平台下可以
    需要在linux中下载好libreoffice
    """
    #  注意cmd中的libreoffice要和linux中安装的一致
    cmd = 'libreoffice --headless --convert-to pdf'.split() + [docPath] + ['--outdir'] + [pdfPath]
    # cmd = 'libreoffice6.2 --headless --convert-to pdf'.split() + [docPath]
    p = subprocess.Popen(cmd, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
    p.wait(timeout=30)  # 停顿30秒等待转化
    stdout, stderr = p.communicate()
    if stderr:
        raise subprocess.SubprocessError(stderr) 


def doc2pdf(docPath, pdfPath):
    """
    注意使用绝对路径
    pdf的生成只写路径，不写名字
    """
    docPathTrue = os.path.abspath(docPath)  # bugfix - searching files in windows/system32
    if client is None:#判断环境，linux环境这里肯定为None
        return doc2pdf_linux(docPathTrue, pdfPath)
    word = gencache.EnsureDispatch('Word.Application')
    doc = word.Documents.Open(docPathTrue, ReadOnly=1)
    doc.ExportAsFixedFormat(pdfPath,
                            constants.wdExportFormatPDF,
                            Item=constants.wdExportDocumentWithMarkup,
                            CreateBookmarks=constants.wdExportCreateHeadingBookmarks)
    word.Quit(constants.wdDoNotSaveChanges)


if __name__ == '__main__':
    wordpath='/myproject/jsg/media/attached/测试文档.docx'
    pdfpath='/myproject/jsg/media/attached'
    doc2pdf(wordpath,pdfpath)