python操作word、pdf函数大全（持续更新）

最新推荐文章于 2024-04-30 19:24:04 发布

七刀

最新推荐文章于 2024-04-30 19:24:04 发布

阅读量1.3k

点赞数 2

分类专栏： python_Django 文章标签： python

本文链接：https://blog.csdn.net/u011321546/article/details/109536712

版权

python_Django 专栏收录该内容

4 篇文章 0 订阅

订阅专栏

采用的是Django，所以主函数是test()，也可以把函数但是拿出来放到脚本中运行，无任何影响！自行下载相关库！

# -*-coding:utf-8-*-
from django.http import HttpResponse
import docx
import win32com.client as win32
from PyPDF2 import PdfFileWriter, PdfFileReader, PdfFileMerger
from win32com.client import constants, gencache
import pythoncom


#word转pdf  支持doc和docx转pdf
def wordToPdf(wordPath, pdfPath):
    pythoncom.CoInitialize()
    word = gencache.EnsureDispatch('Word.Application')
    doc = word.Documents.Open(wordPath, ReadOnly=1)
    doc.ExportAsFixedFormat(pdfPath,
                            constants.wdExportFormatPDF,
                            Item=constants.wdExportDocumentWithMarkup,
                            CreateBookmarks=constants.wdExportCreateHeadingBookmarks)
    word.Quit(constants.wdDoNotSaveChanges)
    pythoncom.CoUninitialize()


# 将Word文件写入到另一个文件中
'''
   files为word文件的数组，里面包含多个word文件
        合并的时候是将以第一个文件为模板进行合并内容的合并,
    即是只是内容的追加，不同的word文件之间并不换页。
   savefile为合并后保存的文件

   其中：files中的部分文件和 savefile文件均可为docx或doc格式的文件
'''


def addWord(files, savefile):
    pythoncom.CoInitialize()
    word = win32.gencache.EnsureDispatch('Word.Application')  # 启动word对象应用
    word.Visible = False
    output = word.Documents.Add()  # 新建合并后的文档
    for file in files:
        output.Application.Selection.InsertFile(file)  # 拼接文档
    doc = output.Range(output.Content.Start, output.Content.End)  # 获取合并后文档的内容
    output.SaveAs(savefile)  # 保存
    output.Close()
    pythoncom.CoUninitialize()


# 删除首页
'''
删除第一页的内容，由于会删除第二页的第一行文字，所以要补上
'''


def delWordPageFirst(file1, file2):
    doc = docx.Document(file1)
    doc.paragraphs[1].clear()
    doc.paragraphs[1].text = '目录'
    doc.save(file2)


# 分割pdf  截取文件中间的指定单个页数
def splitPdfsMiddleOnly(filename, resultname, page):
    output = PdfFileWriter()
    f = open(filename, "rb")
    pdf_file = PdfFileReader(f)
    for i in range(page - 1, page):
        output.addPage(pdf_file.getPage(i))
    outputStream = open(resultname, "wb")
    output.write(outputStream)
    f.close()


# 分割pdf  截取文件中间的指定页数
def splitPdfsMiddleMore(filename, resultname, start, end):
    output = PdfFileWriter()
    f = open(filename, "rb")
    pdf_file = PdfFileReader(f)
    for i in range(start - 1, end):
        output.addPage(pdf_file.getPage(i))
    outputStream = open(resultname, "wb")
    output.write(outputStream)
    f.close()


# 拆分pdf page开始后几页(包含page)
def splitPdfLast(filename, resultname, page):
    output = PdfFileWriter()
    f = open(filename, "rb")
    pdf_file = PdfFileReader(f)
    pdf_pages_len = pdf_file.getNumPages()  # 总页数
    for i in range(page - 1, pdf_pages_len):
        output.addPage(pdf_file.getPage(i))
    outputStream = open(resultname, "wb")
    output.write(outputStream)
    f.close()

# 合并pdf文档
def mergePdf(pdfs, rePdf):
    merger = PdfFileMerger()  # 创建一个合并的对象
    input = []   # 定义数组，用来保存打开的文件
    for i in range(0, len(pdfs)):
        f = open(pdfs[i], 'rb')
        input.append(f)  # 打开的文件保存到数组中
        merger.append(fileobj=f)
    output = open(rePdf, 'wb')  # 保存硬盘上
    merger.write(output)  # 写入到硬盘上
    output.close()  # 关闭文件句柄
    for j in input:
        j.close()  # 遍历关闭打开的文件


def test(request):
    # 路径要写成 \\  不然可能提示找不到文件
    p = 'H:\\program_w\\code\\python\\django\\file\\'
    rp = r'H:\\program_w\\code\\python\\django\\file\\'  # 只读路径，如果word文件合并的时候出错，就用这个

    fModelData = [p + "model.docx", p + "olddata.doc"] # 原来的只有两个文件！
    
    tempWord = p + "temp.docx"  # 写入模板的临时word文档
    tempWordDel = p + "tempdel.docx"  # 删除首页的临时word文件
    tempPdfDel = p + "tempdel.pdf"  # 删除首页的临时pdf文件
    tempPdfmid1 = p + "tempmid1.pdf"  # 截取中间单页
    tempPdfmid2 = p + "tempmid2.pdf"  # 截取中间多页
    tempPdfmid3 = p + "tempmid3.pdf"  # 截取最后几页
    rePdfMore = [tempPdfmid1, tempPdfmid2, tempPdfmid3]  # 需要合并的pdf文档
    rePdf = p + "result.pdf"  # 最终结果
    addWord(fModelData, tempWord)  # 写入模板
    delWordPageFirst(tempWord, tempWordDel)  # 删除首页测试
    wordToPdf(tempWordDel, tempPdfDel)  # Word转换为PDF
    splitPdfsMiddleOnly(tempPdfDel, tempPdfmid1, 2)  # 截取中间的一页码
    splitPdfsMiddleMore(tempPdfDel, tempPdfmid2, 2, 4)  # 截取中间多页
    splitPdfLast(tempPdfDel, tempPdfmid3, 10)  # 截取最后几页
    mergePdf(rePdfMore, rePdf)  # 合并pdf文档
    return HttpResponse(rePdf)

七刀

关注

2
点赞
踩
6

收藏

觉得还不错? 一键收藏
打赏
0
评论
python操作word、pdf函数大全（持续更新）

采用的是Django，所以主函数是test()，也可以把函数但是拿出来放到脚本中运行，无任何影响！自行下载相关库！# -*-coding:utf-8-*-from django.http import HttpResponseimport docximport win32com.client as win32from PyPDF2 import PdfFileWriter, PdfFileReader, PdfFileMergerfrom win32com.client import con
复制链接

扫一扫