pdf填写工具

需求:需要对pdf模板批量生成的pdf且填入数据。提供数据形式excel 格式(xlsx,xls)

生成界面格式。

需要库:PyPDF2,xlrd,reportlab,wxpython ,configparser。

思路:主要利用了pdf文档的注释文本功能,读取pdf特定的文本注释框文本及位置信息,为pdf模板定位需要写入的内容位置。

不多说,直接上代码。

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2020/4/28 16:51
# @Author  : Alfie Liang
# @Site    : 
# @File    : PDF_Reminder.py
# @Software: PyCharm
from threading import Thread
# from multiprocessing import Process
import xlrd
from PyPDF2 import PdfFileWriter, PdfFileReader
from PyPDF2.generic import Destination
import io
import re
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter
import configparser
from wx import MessageBox
import traceback
# import os

class config:
    def config(self):
        try:

            CONFIG_FILE = r".\config.cfg"

            config = configparser.ConfigParser()
            config.read(CONFIG_FILE)
            return config
        except Exception as e:

            MessageBox("{}".format(traceback.format_exc()))
class PDF_Reminder():
    def __init__(self, pdf_filepath1,pdf_filepath2, event):
        
        self.btn = event.GetEventObject()
        self.pdf_filepath1 = pdf_filepath1
        self.pdf_filepath2 = pdf_filepath2
        self.config = config().config()
        self.outputfilepath1 = ''
        self.outputfilepath2 = ''


       
    def write_word_for_PDF(self, pdf_model1_filepath ,outputfilepath):
        self.btn.Disable()
        try:

            # self.outputfilepath1 = self.config.get("PDF_Frame", "output_filepath1")
            # pdf_model1_filepath = self.config.get("PDF_Frame", "pdf_model1_filepath")

            input1 = PdfFileReader(open(pdf_model1_filepath, "rb"))
            nPages = input1.getNumPages()
            keydict = {}
            for i in range(nPages):
                page0 = input1.getPage(i)
                try:
                    if "/Annots" in page0:
                        for annot in page0['/Annots']:

                            #             print(annot.getObject()['/Contents'])
                            if '/Contents' in annot.getObject():
                                if annot.getObject()['/Contents'] in ["{%s}" % i for i in range(1, 60)]:
                                    keydict.setdefault(i, {})
                                    keydict[i].setdefault(annot.getObject()['/Contents'], [])
                                    keydict[i][annot.getObject()['/Contents']].append(annot.getObject()['/Rect'][:2])
                #                         print(annot.getObject()['/Contents'])
                #                         print(annot.getObject()['/Contents'] in ["{1}","{2}","{3}","{4}"])       # (1)
                #                         print(annot.getObject()['/Rect'])
                #                         print('')
                except:
                    # there are no annotations on this page
                    MessageBox("出现错误:{}".format(traceback.format_exc()))
                    print(traceback.format_exc())
                    pass
            wb = xlrd.open_workbook(r"{}".format(self.pdf_filepath1))

            sht1 = wb.sheet_by_index(0)
            pageinfo = {}

            nrows1 = sht1.nrows
            for i in range(1, nrows1):
                single = sht1.row_values(i)

                for page in keydict:
                    packet = io.BytesIO()
                    # 使用Reportlab创建一个新的PDF
                    can = canvas.Canvas(packet, pagesize=letter)
                    for k in keydict[page]:
                        for pos in keydict[page][k]:
                            print(k)
                            print(pos)
                            can.drawString(float(pos[0]), float(pos[1]),
                                           "{}".format(single[int(re.findall("{(\d.*?)}", k)[0]) - 1]))

                    can.save()
                    packet.seek(0)
                    pageinfo.setdefault(page, PdfFileReader(packet))
                # 读取已有的PDF
                existing_pdf = PdfFileReader(open(r"{}".format(pdf_model1_filepath), "rb"))
                output = PdfFileWriter()
                #
                print(existing_pdf.numPages)
                for i in range(existing_pdf.numPages):
                    page = existing_pdf.getPage(i)
                    if i in pageinfo:
                        page.mergePage(pageinfo[i].getPage(0))

                    output.addPage(page)

                # 最后,向目标的pdf写出

                outputStream = open(r"{}\{}.pdf".format(outputfilepath, single[2]),
                                    "wb")
                output.removeLinks()
                output.write(outputStream)
                outputStream.close()
        except:
            MessageBox("出现错误:{}".format(traceback.format_exc()))
        # wb.close()
        self.btn.Enable()
        MessageBox("导出成功请到相应文件路径:{}检查pdf文件。".format(outputfilepath))
    
    def run(self):

        thread_list = []
        pdf_filepath_list = [self.pdf_filepath1, self.pdf_filepath2]
        # pdf_filepath_list = [self.pdf_filepath1]
        outputfilepath_list = [self.outputfilepath1, self.outputfilepath2]
        # outputfilepath_list = [self.outputfilepath1]
        try:
            if self.pdf_filepath2 != '' or self.pdf_filepath1 != '':

                if self.pdf_filepath1 != '':

                    t = Thread(target=self.write_word_for_PDF, args=(
                        self.config.get("PDF_Frame", "pdf_model1_filepath"),
                        self.config.get("PDF_Frame", "output_filepath1")))
                    # t= Process(target=self.write_word_for_PDF, args=(
                    #     self.config.get("PDF_Frame", "pdf_model1_filepath"),
                    #     self.config.get("PDF_Frame", "output_filepath1")))
                    # thread_list.append(t)
                    t.start()
                if self.pdf_filepath2 != '':
                    t = Thread(target=self.write_word_for_PDF, args=(
                        self.config.get("PDF_Frame", "pdf_model2_filepath"),
                        self.config.get("PDF_Frame", "output_filepath2")))
                    # t = Process(target=self.write_word_for_PDF, args=(
                    #     self.config.get("PDF_Frame", "pdf_model2_filepath"),
                    #     self.config.get("PDF_Frame", "output_filepath2")))
                    # thread_list.append(t)
                    t.start()
                # for i in thread_list:
                #     i.join()
                #     MessageBox("导出成功请到相应文件路径:{}检查pdf文件。".format(';\n'.join([outputfilepath_list[s] for s in range(len(pdf_filepath_list)) if pdf_filepath_list[s] not in [""]])))
                #     self.btn.Enable()
            else:
                MessageBox("烦请导入数据文件!")
        except:
            print(traceback.format_exc())
            MessageBox("出现错误:{}".format(traceback.format_exc()))

 界面文件

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2020/4/28 16:49
# @Author  : Alfie Liang
# @Site    : 
# @File    : Frame.py
# @Software: PyCharm
import wx
import multiprocessing
import win32con
from win32comext.shell.shell import ShellExecuteEx
from win32comext.shell import shellcon
from PDF_Reminder import PDF_Reminder

class Frame(wx.Frame):
    def __init__(self):#私有函数
        wx.Frame.__init__(self, None, -1, 'PDF Transfer ', size=(560, 290), style=wx.CAPTION | wx.MINIMIZE_BOX | wx.CLOSE_BOX | wx.SYSTEM_MENU
                          )
        self.panel = wx.Panel(self, -1)
        self.mnotebook = MyNotebook(self.panel)
        self.mnotebook.Show()
        # self.icon = wx.Icon(
        #     r"xxxxxxx",
        #     wx.BITMAP_TYPE_ICO)
        # self.SetIcon(self.icon)
        bs = wx.BoxSizer(wx.VERTICAL)
        bs.Add(self.mnotebook, 1, wx.EXPAND)

        self.panel.SetSizer(bs)
        menuBar = wx.MenuBar()
        filemenu = wx.Menu()

        # userguidepdf = wx.MenuItem(filemenu, id=1, text='userguide', kind=wx.ITEM_NORMAL)
        # filemenu.Append(userguidepdf)
        setting = wx.MenuItem(filemenu, id=2, text='setting', kind=wx.ITEM_NORMAL)
        filemenu.Append(setting)

        menuBar.Append(filemenu, "&File")
        self.Bind(wx.EVT_MENU, self.menuHandler)
        self.SetMenuBar(menuBar)
        self.Show()
        self.Center()
    def menuHandler(self, event):
        id = event.GetId()

        # if id ==1:
        #     path = r""
        #     procInfo = ShellExecuteEx(nShow=win32con.SW_SHOWNORMAL, fMask=shellcon.SEE_MASK_NOCLOSEPROCESS,
        #                               lpVerb='open',
        #                               lpFile=path,
        #                               lpParameters='')
        if id==2:
            path = r".\config.cfg"
            procInfo = ShellExecuteEx(nShow=win32con.SW_SHOWNORMAL, fMask=shellcon.SEE_MASK_NOCLOSEPROCESS,
                                      lpVerb='open',
                                      lpFile=path,
                                      lpParameters='')
class MyNotebook(wx.Notebook):


    def __init__(self, parent):
        self.parent = parent
        wx.Notebook.__init__(self, parent)
        self.page1 = MyPage1(self)
        # self.page2 = MyPage2(self)
        # self.page3 = MyPage3(self)
        self.AddPage(self.page1, "PDF_Transfer")





class MyPage1(wx.Panel):
    def __init__(self, parent):

        wx.Panel.__init__(self, parent)

        panel = wx.Panel(self)
        panel.SetBackgroundColour("white")

        title_statictext = wx.StaticText(self, -1, "导入pdf模板文件路径", pos=(70, 25),style=wx.NO_BORDER)
        self.textctrl1 = wx.TextCtrl(self, -1, u'', size=(300, 20), pos=(70, 50),style=wx.TE_READONLY | wx.TE_MULTILINE)
        self.import_bt = wx.Button(self, -1, "import", pos=(400, 50), size=(70, 20))
        title_statictext2 = wx.StaticText(self, -1, "导入pdf模板文件路径", pos=(70, 90), style=wx.NO_BORDER)
        self.textctrl2 = wx.TextCtrl(self, -1, u'', size=(300, 20), pos=(70, 120), style=wx.TE_READONLY | wx.TE_MULTILINE)
        self.import_bt2 = wx.Button(self, -1, "import", pos=(400, 120), size=(70, 20))

        self.run_bt = wx.Button(self, -1, "Generate", pos=(220, 160), size=(70, 30))

        self.import_bt.Bind(wx.EVT_BUTTON, self.inputbutton1)
        self.import_bt2.Bind(wx.EVT_BUTTON, self.inputbutton2)
        self.textctrl1.Bind(wx.EVT_LEFT_DCLICK, self.cleaninput1file)
        self.textctrl2.Bind(wx.EVT_LEFT_DCLICK, self.cleaninput2file)
        self.run_bt.Bind(wx.EVT_BUTTON, self.run_bt_fun)

        self.Show()
        self.Center()
    def cleaninput1file(self,event):
        '''清除'''
        self.textctrl1.Clear()
    def cleaninput2file(self,event):
        self.textctrl2.Clear()

    def run_bt_fun(self, evnet):
        pdf_filepath1 = self.textctrl1.GetValue()
        pdf_filepath2 = self.textctrl2.GetValue()
        print(pdf_filepath1,pdf_filepath2)
        PDF_Reminder(pdf_filepath1, pdf_filepath2, evnet).run()



    def inputbutton1(self,event):
        btn = event.GetEventObject()
        btn.Disable()
        files = "EXCEL文件(*.xlsx;*.xls;*.xlsm)|*.xlsx;*.xls;*.xlsm|" "EXCEL工作簿(*.xlsx)|*.xlsx|" "EXCEL工作簿(*.xls)|*.xls|" "启用宏的EXCEL工作簿(*.xlsm)|*.xlsm"
        file_dir = wx.FileDialog(None, message="选择单个文件", wildcard=files, style=wx.FD_OPEN)  # 弹出的选择框
        if file_dir.ShowModal() == wx.ID_OK:  # wx.ID_OK是判断结果,并执行相应程序
            self.textctrl1.Clear()
            self.textctrl1.AppendText("%s" % file_dir.GetPath())

            wx.MessageBox("文件导入成功!!!")
        btn.Enable()
    def inputbutton2(self,event):
        btn = event.GetEventObject()
        btn.Disable()
        files = "EXCEL文件(*.xlsx;*.xls;*.xlsm)|*.xlsx;*.xls;*.xlsm|" "EXCEL工作簿(*.xlsx)|*.xlsx|" "EXCEL工作簿(*.xls)|*.xls|" "启用宏的EXCEL工作簿(*.xlsm)|*.xlsm"
        file_dir = wx.FileDialog(None, message="选择单个文件", wildcard=files, style=wx.FD_OPEN)  # 弹出的选择框
        if file_dir.ShowModal() == wx.ID_OK:  # wx.ID_OK是判断结果,并执行相应程序
            self.textctrl2.Clear()
            self.textctrl2.AppendText("%s" % file_dir.GetPath())

            wx.MessageBox("文件导入成功!!!")
        btn.Enable()
if __name__ == '__main__':
    multiprocessing.freeze_support()
    app = wx.App()

    login = Frame()
    app.MainLoop()

config.cfg文件

[PDF_Frame]
PDF_Frame_desc = 该exe可以根据excel文件中A-...(共六十列有效范围),且根据pdf文件中注释中的{1},{2},{3}....识别对应excel的A,B,C...等等列,ps:注意excel文件中文本数值格式等,烦请先提前做好格式转换;并且到pdf文件中标注{1},{2}等注释,放到相应位置,且----!!!!位置应放得更标准,准确!!!!----,(----------------!!!!!!!!!!!!!!!!!!!生成出来的pdf会出去所有注释内容!!!!!!!!!!!!。。)。其中以C列为生成的文件名。

pdf_model1_filepath = xxxxpdf模板1文件路径

output_filepath1 = 模板一输出路径

pdf_model2_filepath = xxxxpdf模板1文件路径

output_filepath2 = 模板二输出路径

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值