Python PDF转图片 Word

最新推荐文章于 2024-02-27 15:45:38 发布

黑盒变白盒才有意思

最新推荐文章于 2024-02-27 15:45:38 发布

阅读量1k

点赞数

文章标签： python PDF Word

本文链接：https://blog.csdn.net/qq_36497454/article/details/121664454

版权

最近有相关需求就用Python做了一个PDF处理工具代码如下：

不想写代码的可以用打包好EXE

https://download.csdn.net/download/qq_36497454/53542250

# main.py
# Python3.x 导入方法

from window import MainWindow

main_window_class = MainWindow()

window = main_window_class.get_window()
# 进入消息循环
window.mainloop()



# MainWindow.py
# Python3.x 导入方法
import os
from tkinter import *
from tkinter import filedialog
from imageUtil import pyMuPDF_fitz
from wordUtil import PDFtoWord


class MainWindow:
    pdf_path: object

    def __init__(self):
        # 创建窗口对象的背景色
        self.pdf_path = None
        self.dir_path = None
        self.window = Tk()
        # self.window.attributes('-topmost', True)
        # 窗口名
        self.window.title('PDF文件转化器')
        # 几何位置
        self.window.geometry('600x400+50+100')
        # PDF文件标签
        self.pdf_lable = Label(self.window, text="PDF:")
        self.pdf_lable.place(x=100, y=25)
        # 文件文本框
        self.pdf_text = Text(self.window, bd=5, width=30, height=2)
        self.pdf_text.place(x=180, y=20)
        # 选择PDF文件按钮
        self.pdf_button = Button(self.window, text="选择PDF", width=10, command=self.set_pdf)
        self.pdf_button.place(x=440, y=20)

        # 目录标签
        self.dir_lable = Label(self.window, text="保存目录:")
        self.dir_lable.place(x=100, y=75)
        # 目录文本框
        self.dir_text = Text(self.window, bd=5, width=30, height=2)
        self.dir_text.place(x=180, y=70)
        # 选择图片目录按钮
        self.dir_button = Button(self.window, text="保存目录", width=10, command=self.set_dir)
        self.dir_button.place(x=440, y=70)

        # 转化按钮
        self.change_image_button = Button(self.window, text="生成图片", width=10, command=self.pdf_image)
        self.change_image_button.place(x=150, y=150)

        self.change_word_button = Button(self.window, text="生成word", width=10, command=self.pdf_word)
        self.change_word_button.place(x=450, y=150)

    def get_window(self):
        return self.window

    def set_pdf(self):
        # 获取所选文件
        self.pdf_path = filedialog.askopenfilename()
        self.pdf_text.delete(1.0, END)
        self.pdf_text.insert(1.0, self.pdf_path)

    def set_dir(self):
        # 获取所选文件
        self.dir_path = filedialog.askdirectory()
        self.dir_text.delete(1.0, END)
        self.dir_text.insert(1.0, self.dir_path)

    def pdf_image(self):

        pyMuPDF_fitz(self.pdf_path, self.dir_path)

    def pdf_word(self):
        file_name = os.path.basename(self.pdf_path).split(".")[0]
        word_path = self.dir_path + '/' + file_name + ".doc"
        PDFtoWord(self.pdf_path, word_path)




# PDFtoWord.py
from pdf2docx import Converter


# 读取PDF
def PDFtoWord(pdf_file, docx_file):
    # convert pdf to docx
    cv = Converter(pdf_file)
    cv.convert(docx_file, start=0, end=None)
    cv.close()

# pyMuPDF_fitz.py

import datetime
import os
import logging

import fitz  # fitz就是pip install PyMuPDF


def pyMuPDF_fitz(pdf_path, image_path):
    pdfDoc = fitz.open(pdf_path)
    file_name = os.path.basename(pdf_path).split(".")[0]
    for pg in range(pdfDoc.pageCount):
        page = pdfDoc[pg]
        rotate = int(0)
        # 每个尺寸的缩放系数为1.3，这将为我们生成分辨率提高2.6的图像。
        # 此处若是不做设置，默认图片大小为：792X612, dpi=96
        zoom_x = 1.33333333  # (1.33333333-->1056x816)   (2-->1584x1224)
        zoom_y = 1.33333333
        mat = fitz.Matrix(zoom_x, zoom_y).preRotate(rotate)
        pix = page.getPixmap(matrix=mat, alpha=False)
        if not os.path.exists(image_path):  # 判断存放图片的文件夹是否存在
            os.makedirs(image_path)  # 若图片文件夹不存在就创建
        pix.writePNG(image_path + '/' + file_name+'_%s.jpg' % pg)  # 将图片写入指定的文件夹内