需求:需要对pdf模板批量生成的pdf且填入数据。提供数据形式excel 格式(xlsx,xls)
生成界面格式。
需要库:PyPDF2,xlrd,reportlab,wxpython ,configparser。
思路:主要利用了pdf文档的注释文本功能,读取pdf特定的文本注释框文本及位置信息,为pdf模板定位需要写入的内容位置。
不多说,直接上代码。
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2020/4/28 16:51
# @Author : Alfie Liang
# @Site :
# @File : PDF_Reminder.py
# @Software: PyCharm
from threading import Thread
# from multiprocessing import Process
import xlrd
from PyPDF2 import PdfFileWriter, PdfFileReader
from PyPDF2.generic import Destination
import io
import re
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter
import configparser
from wx import MessageBox
import traceback
# import os
class config:
def config(self):
try:
CONFIG_FILE = r".\config.cfg"
config = configparser.ConfigParser()
config.read(CONFIG_FILE)
return config
except Exception as e:
MessageBox("{}".format(traceback.format_exc()))
class PDF_Reminder():
def __init__(self, pdf_filepath1,pdf_filepath2, event):
self.btn = event.GetEventObject()
self.pdf_filepath1 = pdf_filepath1
self.pdf_filepath2 = pdf_filepath2
self.config = config().config()
self.outputfilepath1 = ''
self.outputfilepath2 = ''
def write_word_for_PDF(self, pdf_model1_filepath ,outputfilepath):
self.btn.Disable()
try:
# self.outputfilepath1 = self.config.get("PDF_Frame", "output_filepath1")
# pdf_model1_filepath = self.config.get("PDF_Frame", "pdf_model1_filepath")
input1 = PdfFileReader(open(pdf_model1_filepath, "rb"))
nPages = input1.getNumPages()
keydict = {}
for i in range(nPages):
page0 = input1.getPage(i)
try:
if "/Annots" in page0:
for annot in page0['/Annots']:
# print(annot.getObject()['/Contents'])
if '/Contents' in annot.getObject():
if annot.getObject()['/Contents'] in ["{%s}" % i for i in range(1, 60)]:
keydict.setdefault(i, {})
keydict[i].setdefault(annot.getObject()['/Contents'], [])
keydict[i][annot.getObject()['/Contents']].append(annot.getObject()['/Rect'][:2])
# print(annot.getObject()['/Contents'])
# print(annot.getObject()['/Contents'] in ["{1}","{2}","{3}","{4}"]) # (1)
# print(annot.getObject()['/Rect'])
# print('')
except:
# there are no annotations on this page
MessageBox("出现错误:{}".format(traceback.format_exc()))
print(traceback.format_exc())
pass
wb = xlrd.open_workbook(r"{}".format(self.pdf_filepath1))
sht1 = wb.sheet_by_index(0)
pageinfo = {}
nrows1 = sht1.nrows
for i in range(1, nrows1):
single = sht1.row_values(i)
for page in keydict:
packet = io.BytesIO()
# 使用Reportlab创建一个新的PDF
can = canvas.Canvas(packet, pagesize=letter)
for k in keydict[page]:
for pos in keydict[page][k]:
print(k)
print(pos)
can.drawString(float(pos[0]), float(pos[1]),
"{}".format(single[int(re.findall("{(\d.*?)}", k)[0]) - 1]))
can.save()
packet.seek(0)
pageinfo.setdefault(page, PdfFileReader(packet))
# 读取已有的PDF
existing_pdf = PdfFileReader(open(r"{}".format(pdf_model1_filepath), "rb"))
output = PdfFileWriter()
#
print(existing_pdf.numPages)
for i in range(existing_pdf.numPages):
page = existing_pdf.getPage(i)
if i in pageinfo:
page.mergePage(pageinfo[i].getPage(0))
output.addPage(page)
# 最后,向目标的pdf写出
outputStream = open(r"{}\{}.pdf".format(outputfilepath, single[2]),
"wb")
output.removeLinks()
output.write(outputStream)
outputStream.close()
except:
MessageBox("出现错误:{}".format(traceback.format_exc()))
# wb.close()
self.btn.Enable()
MessageBox("导出成功请到相应文件路径:{}检查pdf文件。".format(outputfilepath))
def run(self):
thread_list = []
pdf_filepath_list = [self.pdf_filepath1, self.pdf_filepath2]
# pdf_filepath_list = [self.pdf_filepath1]
outputfilepath_list = [self.outputfilepath1, self.outputfilepath2]
# outputfilepath_list = [self.outputfilepath1]
try:
if self.pdf_filepath2 != '' or self.pdf_filepath1 != '':
if self.pdf_filepath1 != '':
t = Thread(target=self.write_word_for_PDF, args=(
self.config.get("PDF_Frame", "pdf_model1_filepath"),
self.config.get("PDF_Frame", "output_filepath1")))
# t= Process(target=self.write_word_for_PDF, args=(
# self.config.get("PDF_Frame", "pdf_model1_filepath"),
# self.config.get("PDF_Frame", "output_filepath1")))
# thread_list.append(t)
t.start()
if self.pdf_filepath2 != '':
t = Thread(target=self.write_word_for_PDF, args=(
self.config.get("PDF_Frame", "pdf_model2_filepath"),
self.config.get("PDF_Frame", "output_filepath2")))
# t = Process(target=self.write_word_for_PDF, args=(
# self.config.get("PDF_Frame", "pdf_model2_filepath"),
# self.config.get("PDF_Frame", "output_filepath2")))
# thread_list.append(t)
t.start()
# for i in thread_list:
# i.join()
# MessageBox("导出成功请到相应文件路径:{}检查pdf文件。".format(';\n'.join([outputfilepath_list[s] for s in range(len(pdf_filepath_list)) if pdf_filepath_list[s] not in [""]])))
# self.btn.Enable()
else:
MessageBox("烦请导入数据文件!")
except:
print(traceback.format_exc())
MessageBox("出现错误:{}".format(traceback.format_exc()))
界面文件
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2020/4/28 16:49
# @Author : Alfie Liang
# @Site :
# @File : Frame.py
# @Software: PyCharm
import wx
import multiprocessing
import win32con
from win32comext.shell.shell import ShellExecuteEx
from win32comext.shell import shellcon
from PDF_Reminder import PDF_Reminder
class Frame(wx.Frame):
def __init__(self):#私有函数
wx.Frame.__init__(self, None, -1, 'PDF Transfer ', size=(560, 290), style=wx.CAPTION | wx.MINIMIZE_BOX | wx.CLOSE_BOX | wx.SYSTEM_MENU
)
self.panel = wx.Panel(self, -1)
self.mnotebook = MyNotebook(self.panel)
self.mnotebook.Show()
# self.icon = wx.Icon(
# r"xxxxxxx",
# wx.BITMAP_TYPE_ICO)
# self.SetIcon(self.icon)
bs = wx.BoxSizer(wx.VERTICAL)
bs.Add(self.mnotebook, 1, wx.EXPAND)
self.panel.SetSizer(bs)
menuBar = wx.MenuBar()
filemenu = wx.Menu()
# userguidepdf = wx.MenuItem(filemenu, id=1, text='userguide', kind=wx.ITEM_NORMAL)
# filemenu.Append(userguidepdf)
setting = wx.MenuItem(filemenu, id=2, text='setting', kind=wx.ITEM_NORMAL)
filemenu.Append(setting)
menuBar.Append(filemenu, "&File")
self.Bind(wx.EVT_MENU, self.menuHandler)
self.SetMenuBar(menuBar)
self.Show()
self.Center()
def menuHandler(self, event):
id = event.GetId()
# if id ==1:
# path = r""
# procInfo = ShellExecuteEx(nShow=win32con.SW_SHOWNORMAL, fMask=shellcon.SEE_MASK_NOCLOSEPROCESS,
# lpVerb='open',
# lpFile=path,
# lpParameters='')
if id==2:
path = r".\config.cfg"
procInfo = ShellExecuteEx(nShow=win32con.SW_SHOWNORMAL, fMask=shellcon.SEE_MASK_NOCLOSEPROCESS,
lpVerb='open',
lpFile=path,
lpParameters='')
class MyNotebook(wx.Notebook):
def __init__(self, parent):
self.parent = parent
wx.Notebook.__init__(self, parent)
self.page1 = MyPage1(self)
# self.page2 = MyPage2(self)
# self.page3 = MyPage3(self)
self.AddPage(self.page1, "PDF_Transfer")
class MyPage1(wx.Panel):
def __init__(self, parent):
wx.Panel.__init__(self, parent)
panel = wx.Panel(self)
panel.SetBackgroundColour("white")
title_statictext = wx.StaticText(self, -1, "导入pdf模板文件路径", pos=(70, 25),style=wx.NO_BORDER)
self.textctrl1 = wx.TextCtrl(self, -1, u'', size=(300, 20), pos=(70, 50),style=wx.TE_READONLY | wx.TE_MULTILINE)
self.import_bt = wx.Button(self, -1, "import", pos=(400, 50), size=(70, 20))
title_statictext2 = wx.StaticText(self, -1, "导入pdf模板文件路径", pos=(70, 90), style=wx.NO_BORDER)
self.textctrl2 = wx.TextCtrl(self, -1, u'', size=(300, 20), pos=(70, 120), style=wx.TE_READONLY | wx.TE_MULTILINE)
self.import_bt2 = wx.Button(self, -1, "import", pos=(400, 120), size=(70, 20))
self.run_bt = wx.Button(self, -1, "Generate", pos=(220, 160), size=(70, 30))
self.import_bt.Bind(wx.EVT_BUTTON, self.inputbutton1)
self.import_bt2.Bind(wx.EVT_BUTTON, self.inputbutton2)
self.textctrl1.Bind(wx.EVT_LEFT_DCLICK, self.cleaninput1file)
self.textctrl2.Bind(wx.EVT_LEFT_DCLICK, self.cleaninput2file)
self.run_bt.Bind(wx.EVT_BUTTON, self.run_bt_fun)
self.Show()
self.Center()
def cleaninput1file(self,event):
'''清除'''
self.textctrl1.Clear()
def cleaninput2file(self,event):
self.textctrl2.Clear()
def run_bt_fun(self, evnet):
pdf_filepath1 = self.textctrl1.GetValue()
pdf_filepath2 = self.textctrl2.GetValue()
print(pdf_filepath1,pdf_filepath2)
PDF_Reminder(pdf_filepath1, pdf_filepath2, evnet).run()
def inputbutton1(self,event):
btn = event.GetEventObject()
btn.Disable()
files = "EXCEL文件(*.xlsx;*.xls;*.xlsm)|*.xlsx;*.xls;*.xlsm|" "EXCEL工作簿(*.xlsx)|*.xlsx|" "EXCEL工作簿(*.xls)|*.xls|" "启用宏的EXCEL工作簿(*.xlsm)|*.xlsm"
file_dir = wx.FileDialog(None, message="选择单个文件", wildcard=files, style=wx.FD_OPEN) # 弹出的选择框
if file_dir.ShowModal() == wx.ID_OK: # wx.ID_OK是判断结果,并执行相应程序
self.textctrl1.Clear()
self.textctrl1.AppendText("%s" % file_dir.GetPath())
wx.MessageBox("文件导入成功!!!")
btn.Enable()
def inputbutton2(self,event):
btn = event.GetEventObject()
btn.Disable()
files = "EXCEL文件(*.xlsx;*.xls;*.xlsm)|*.xlsx;*.xls;*.xlsm|" "EXCEL工作簿(*.xlsx)|*.xlsx|" "EXCEL工作簿(*.xls)|*.xls|" "启用宏的EXCEL工作簿(*.xlsm)|*.xlsm"
file_dir = wx.FileDialog(None, message="选择单个文件", wildcard=files, style=wx.FD_OPEN) # 弹出的选择框
if file_dir.ShowModal() == wx.ID_OK: # wx.ID_OK是判断结果,并执行相应程序
self.textctrl2.Clear()
self.textctrl2.AppendText("%s" % file_dir.GetPath())
wx.MessageBox("文件导入成功!!!")
btn.Enable()
if __name__ == '__main__':
multiprocessing.freeze_support()
app = wx.App()
login = Frame()
app.MainLoop()
config.cfg文件
[PDF_Frame]
PDF_Frame_desc = 该exe可以根据excel文件中A-...(共六十列有效范围),且根据pdf文件中注释中的{1},{2},{3}....识别对应excel的A,B,C...等等列,ps:注意excel文件中文本数值格式等,烦请先提前做好格式转换;并且到pdf文件中标注{1},{2}等注释,放到相应位置,且----!!!!位置应放得更标准,准确!!!!----,(----------------!!!!!!!!!!!!!!!!!!!生成出来的pdf会出去所有注释内容!!!!!!!!!!!!。。)。其中以C列为生成的文件名。pdf_model1_filepath = xxxxpdf模板1文件路径
output_filepath1 = 模板一输出路径
pdf_model2_filepath = xxxxpdf模板1文件路径
output_filepath2 = 模板二输出路径