文件转换的小工具-CSDN博客

本文链接：https://blog.csdn.net/maxiluo/article/details/138525390

概述

在处理一个客户的交付文件的时候，客户要求转换为带水印的pdf版本，尝试使用wps会员和市面上一些工具批量转换之后，在客户的麒麟系统上看不到水印，由于文件巨多，就整理了一个小工具。

功能

目前4个功能：doc转docx、docx转pdf、pdf加水印、docx加水印（库要收费，转出来会多一行字）

代码

main.py

from tkinter import messagebox
import window
import os
import tkinter as tk


def show_popup(title, message):
    """弹窗函数"""
    root = tk.Tk()
    root.withdraw()
    messagebox.showinfo(title, message)
    root.destroy()


def is_null(obj, massage):
    """判断是否为空"""
    if not obj:
        show_popup("文件转换工具&Mashiro", massage)
        return True


def file_or_directory(path):
    """判断是文件还是目录"""
    try:
        if os.path.isfile(path):
            return True
        elif os.path.isdir(path):
            return False
        elif not path:
            show_popup("文件转换工具&Mashiro", "先选择文件或目录")
    except Exception as e:
        show_popup("文件转换工具&Mashiro", "文件或目录错误")


def file_in_dic(directory_path):
    """遍历目录下的文件"""
    file_list = []
    for root, directories, files in os.walk(directory_path):
        for file in files:
            if not file.startswith("~$"):
                file_path = os.path.join(root, file)
                file_list.append(file_path)
    return file_list


def file_type(file_path):
    """判断文件类型"""
    last_name = os.path.splitext(file_path)
    if last_name[1] == ".doc":
        return "doc"
    elif last_name[1] == ".docx":
        return "docx"
    elif last_name[1] == ".pdf":
        return "pdf"
    else:
        show_popup("文件转换工具&Mashiro", "转换文件的格式需仅为‘doc’、‘docx’、‘pdf’")


if __name__ == '__main__':
    window = window.Window()

window.py

import os.path
import tkinter as tk
from tkinter import filedialog
import win32com.client
import main
import pdf
import word
import time


class Window:
    def __init__(self):
        # 创建窗口对象
        self.root = tk.Tk()
        self.root.title("文件转换工具&Mashiro")
        self.root.geometry("600x250")
        # 创建框架
        frame1 = tk.Frame(self.root)
        frame1.pack(fill="both", expand=True)
        frame2 = tk.Frame(self.root)
        frame2.pack(fill="both", expand=True)
        frame3 = tk.Frame(self.root)
        frame3.pack(fill="both", expand=True)
        frame4 = tk.Frame(self.root)
        frame4.pack(fill="both", expand=True)
        frame5 = tk.Frame(self.root)
        frame5.pack(fill="both", expand=True)
        """框架1"""
        # 功能页面
        self.button1_1 = tk.Button(frame1, text="选择文件", command=self.set_in_path_file)  # 选文件按钮
        self.button1_1.grid(row=0, column=0, sticky="w")
        self.button1_2 = tk.Button(frame1, text="选择目录", command=self.set_in_path_directory)  # 选目录按钮
        self.button1_2.grid(row=0, column=1, sticky="w")
        self.in_path = tk.StringVar()
        self.label1 = tk.Label(frame1, textvariable=self.in_path)
        self.label1.grid(row=1, column=0, sticky="w", columnspan=5)
        """框架2"""
        # 功能选择
        self.var = tk.StringVar()
        self.var.set('1')
        self.label2_1 = tk.Label(frame2, text="操作：")
        self.label2_1.grid(row=0, column=0, sticky="w")
        radiobutton2_1 = tk.Radiobutton(frame2, text='doc转docx', variable=self.var, value='1')
        radiobutton2_1.grid(row=0, column=1, sticky="")
        radiobutton2_2 = tk.Radiobutton(frame2, text='docx转为pdf', variable=self.var, value='2')
        radiobutton2_2.grid(row=0, column=2, sticky="")
        radiobutton2_3 = tk.Radiobutton(frame2, text='pdf加水印', variable=self.var, value='3')
        radiobutton2_3.grid(row=0, column=3, sticky="")
        radiobutton2_4 = tk.Radiobutton(frame2, text='docx加水印', variable=self.var, value='4')
        radiobutton2_4.grid(row=0, column=4, sticky="")
        """框架3"""
        # 水印参数
        self.label3 = tk.Label(frame3, text="水印文本：")  # 水印文本
        self.label3.grid(row=0, column=0, sticky="w")
        self.watermark = tk.StringVar()
        self.watermark.set("")
        self.entry1 = tk.Entry(frame3, textvariable=self.watermark, width=30)
        self.entry1.grid(row=0, column=1, sticky="w")
        self.label3 = tk.Label(frame3, text="字号：")  # 水印字号
        self.label3.grid(row=0, column=2, sticky="w")
        options1 = [36, 40, 44, 48, 54, 60, 66, 72, 80, 90, 96, 105, 120, 144]
        self.size = tk.StringVar()
        self.size.set(options1[0])
        self.option_menu1 = tk.OptionMenu(frame3, self.size, *options1)
        self.option_menu1.grid(row=0, column=3, sticky="w")
        self.label4 = tk.Label(frame3, text="版式：")  # 水印版式
        self.label4.grid(row=0, column=4, sticky="w")
        options2 = ["水平", "斜式"]
        self.format = tk.StringVar()
        self.format.set(options2[1])
        self.option_menu2 = tk.OptionMenu(frame3, self.format, *options2)
        self.option_menu2.grid(row=0, column=5, sticky="w")
        """框架4"""
        self.button4_1 = tk.Button(frame4, text="输出路径", command=self.set_out_path_directory)  # 输出路径
        self.button4_1.grid(row=0, column=0, sticky="w")
        self.out_path = tk.StringVar()
        self.label4_1 = tk.Label(frame4, textvariable=self.out_path)
        self.label4_1.grid(row=1, column=0)
        """框架5"""
        self.button5_1 = tk.Button(frame5, text="开始转换", command=self.start)  # 输出路径
        self.button5_1.grid(row=0, column=0, sticky="ew")
        self.finish = tk.StringVar()
        self.label5_1 = tk.Label(frame5, textvariable=self.finish)
        self.label5_1.grid(row=0, column=1)
        # 运行主循环
        self.root.mainloop()

    def set_in_path_file(self):
        """选择输入文件"""
        file_path = filedialog.askopenfilename()
        if file_path:
            self.in_path.set(file_path)

    def set_in_path_directory(self):
        """选择输入目录"""
        directory_path = filedialog.askdirectory()
        if directory_path:
            self.in_path.set(directory_path)

    def set_out_path_directory(self):
        """选择输出目录"""
        directory_path = filedialog.askdirectory()
        if directory_path:
            self.out_path.set(directory_path)

    def finsh_or_error(self, result):
        """是否完成"""
        if result:
            self.finish.set("转换完成，保存在：" + self.out_path.get())
        else:
            self.finish.set("转换失败")

    def file_path_handing(self):
        """文件路径处理"""
        one = self.out_path.get() + '/' + os.path.basename(self.in_path.get())
        two = os.path.splitext(one)
        return two[0]

    def dic_path_handing(self, input_file_path):
        """目录路径处理"""
        one = os.path.relpath(input_file_path, self.in_path.get())
        two = self.out_path.get() + '/' + one
        three = os.path.splitext(two)
        return three[0]

    def start(self):
        """开始转换之后"""
        # 判断输入参数是狗完整
        if main.is_null(self.in_path.get(), "请选择要转换的文件或目录"):
            return
        elif main.is_null(self.out_path.get(), "请选择输出路径"):
            return
        else:
            # 判断是文件还是目录   input_file_path  output_dic_path
            if main.file_or_directory(self.in_path.get()):  # 是文件
                # 判断操作
                if self.var.get() == '1':  # doc转docx
                    file_type = main.file_type(self.in_path.get())
                    if file_type != 'doc':
                        self.finish.set("文件类型错误，仅为doc文件")
                        return
                    word_object = win32com.client.Dispatch("Word.Application")
                    output_dic_path = Window.file_path_handing(self)
                    result = word.docx_to_pdf(self.in_path.get(), output_dic_path, word_object)
                    word_object.Quit()
                    Window.finsh_or_error(self, result)
                elif self.var.get() == '2':  # docx转pdf
                    file_type = main.file_type(self.in_path.get())
                    if file_type != 'docx':
                        self.finish.set("文件类型错误，仅为docx文件")
                        return
                    word_object = win32com.client.Dispatch("Word.Application")
                    output_dic_path = Window.file_path_handing(self)
                    result = word.doc_to_docx(self.in_path.get(), output_dic_path, word_object)
                    word_object.Quit()
                    Window.finsh_or_error(self, result)
                elif self.var.get() == '3':  # pdf加水印
                    file_type = main.file_type(self.in_path.get())
                    if file_type != 'pdf':
                        self.finish.set("文件类型错误，仅为pdf文件")
                        return
                    pdf.creat_watermark(self.watermark.get(), self.format.get(), self.size.get())
                    output_dic_path = Window.file_path_handing(self)
                    result = pdf.pdf_add_watermark(self.in_path.get(), output_dic_path)
                    Window.finsh_or_error(self, result)
                    time.sleep(1)
                elif self.var.get() == '4':  # docx加水印
                    file_type = main.file_type(self.in_path.get())
                    if file_type != 'docx':
                        self.finish.set("文件类型错误，仅为docx文件")
                        return
                    output_dic_path = Window.dic_path_handing(self)
                    result = word.word_add_watermark(self.in_path.get(), output_dic_path, self.watermark.get(), self.format.get(),
                                                     self.size.get())
                    Window.finsh_or_error(self, result)
                    time.sleep(1)
            else:  # 是目录
                i = 1
                for file in main.file_in_dic(self.in_path.get()):
                    if self.var.get() == '1':  # doc转docx
                        file_type = main.file_type(file)
                        if file_type != 'doc':
                            self.finish.set("文件类型错误，仅为doc文件")
                            break
                        word_object = win32com.client.Dispatch("Word.Application")
                        output_dic_path = Window.dic_path_handing(self, file)
                        result = word.doc_to_docx(file, output_dic_path, word_object)
                        word_object.Quit()
                        Window.finsh_or_error(self, result)
                        time.sleep(1)
                    elif self.var.get() == '2':  # docx转pdf
                        file_type = main.file_type(file)
                        if file_type != 'docx':
                            self.finish.set("文件类型错误，仅为docx文件")
                            break
                        word_object = win32com.client.Dispatch("Word.Application")
                        output_dic_path = Window.dic_path_handing(self, file)
                        result = word.docx_to_pdf(file, output_dic_path, word_object)
                        word_object.Quit()
                        Window.finsh_or_error(self, result)
                        time.sleep(1)
                    elif self.var.get() == '3':  # pdf加水印
                        file_type = main.file_type(file)
                        if file_type != 'pdf':
                            self.finish.set("文件类型错误，仅为pdf文件")
                            return
                        if i == 1:  # 黄健水印模板
                            pdf.creat_watermark(self.watermark.get(), self.format.get(), self.size.get())
                            i = i + 1
                        output_dic_path = Window.dic_path_handing(self, file)
                        result = pdf.pdf_add_watermark(file, output_dic_path)
                        Window.finsh_or_error(self, result)
                        time.sleep(1)
                    elif self.var.get() == '4':  # word加水印
                        file_type = main.file_type(file)
                        if file_type != 'docx':
                            self.finish.set("文件类型错误，仅为docx文件")
                            return
                        output_dic_path = Window.dic_path_handing(self, file)
                        result = word.word_add_watermark(file, output_dic_path, self.watermark.get(), self.format.get(), self.size.get())
                        Window.finsh_or_error(self, result)
                        time.sleep(1)

pdf.py

from PyPDF2 import PdfWriter, PdfReader
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter
from reportlab.pdfbase.ttfonts import TTFont
from reportlab.pdfbase import pdfmetrics
import os


def pdf_add_watermark(input_file_path, output_dic_path, pdf='pdf'):
    """pdf加水印"""
    try:
        # 读取原始PDF
        output_file_path = output_dic_path + '.' + pdf
        input_pdf = PdfReader(input_file_path)
        output_pdf = PdfWriter()
        watermark = PdfReader("watermark.pdf").pages[0]  # 使用 pages 属性获取页面
        # 为每一页添加水印
        for page_number, page in enumerate(input_pdf.pages):
            page.merge_page(watermark)
            output_pdf.add_page(page)
        # 保存结果
        if not os.path.exists(os.path.split(output_file_path)[0]):
            os.makedirs(os.path.split(output_file_path)[0])
        output_path = os.path.dirname(output_file_path)  # 获取输出PDF文件的路径
        output_filename = os.path.join(output_path, os.path.basename(output_file_path))  # 创建输出PDF文件的完整路径
        with open(output_filename, "wb") as output:
            output_pdf.write(output)
        return True
    except Exception as e:
        return False


def creat_watermark(watermark_text, format, size, font_path="simsun.ttc"):
    """
    创建水印模板
    """
    angle = 45
    if format == "水平":
        angle = 0
    elif format == "斜体":
        angle = 45
    c = canvas.Canvas("watermark.pdf", pagesize=letter)
    pdfmetrics.registerFont(TTFont('CustomFont', font_path))  # 注册自定义字体
    c.setFont("CustomFont", int(size))  # 设置字体为自定义字体，字号为80
    c.setFillGray(0.5, 0.5)  # 设置水印颜色
    c.rotate(int(angle))  # 将水印旋转45度
    c.drawString(270, 80, watermark_text)  # 设置水印位置
    c.save()

word.py

from docx import Document
from spire.doc import *
from spire.doc.common import *


def docx_to_pdf(input_file_path, output_dic_path, word_object, word='word', pdf='pdf'):
    """docx转为pdf"""
    try:
        output_file_path = output_dic_path + '.' + pdf
        if not os.path.exists(os.path.split(output_file_path)[0]):
            os.makedirs(os.path.split(output_file_path)[0])
        doc = word_object.Documents.Open(input_file_path)
        doc.SaveAs(output_file_path, FileFormat=17)  # 17 表示pdf文件格式
        doc.Close()
        return True
    except Exception as e:
        return False


def doc_to_docx(input_file_path, output_dic_path, word_object, doc='doc', docx='docx'):
    """doc转docx"""
    try:
        output_file_path = output_dic_path + '.' + docx
        if not os.path.exists(os.path.split(output_file_path)[0]):
            os.makedirs(os.path.split(output_file_path)[0])
        doc = word_object.Documents.Open(input_file_path)
        doc.SaveAs(output_file_path, FileFormat=16)  # 16 表示docx文件格式
        doc.Close()
        return True
    except Exception as e:
        return False


def word_add_watermark(input_file_path, output_dic_path, watermark_text, format, size, docx='docx'):
    """word加水印"""
    try:
        document = Document()
        document.LoadFromFile(input_file_path)
        txt_watermark = TextWatermark()
        # 设置文本水印的格式
        txt_watermark.Text = watermark_text
        txt_watermark.FontSize = int(size)
        txt_watermark.Color = Color.get_Gray()
        if format == "水平":
            txt_watermark.Layout = WatermarkLayout.Horizontal
        elif format == "斜体":
            txt_watermark.Layout = WatermarkLayout.Diagonal
        txt_watermark.fontName = "Arial"
        # 将文本水印添加到文档中
        document.Watermark = txt_watermark
        # 保存结果文档
        output_file_path = output_dic_path + '.' + docx
        if not os.path.exists(os.path.split(output_file_path)[0]):
            os.makedirs(os.path.split(output_file_path)[0])
        document.SaveToFile(output_file_path, FileFormat.Docx)
        document.Close()
        return True
    except Exception as e:
        return False