前置条件:模板为使用Adobe Acrobat DC编辑的有表单的pdf
相关库版本如下:
reportlab:3.5.0
PyPDF2:2.10.0
pdfrw:0.4
1.正常填充
使用默认样式,修改其他样式可能无效
import pdfrw
ANNOT_KEY = '/Annots'
ANNOT_FIELD_KEY = '/T'
ANNOT_RECT_KEY = '/Rect'
SUBTYPE_KEY = '/Subtype'
WIDGET_SUBTYPE_KEY = '/Widget'
# 填写pdf模板并输出新的pdf
def write_fill_able_pdf(template_report_path, data_dict, output_pdf_path):
template_pdf = pdfrw.PdfReader(template_report_path)
pdf_pages = len(template_pdf.pages)
template_pdf.Root.AcroForm.update(pdfrw.PdfDict(
NeedAppearances=pdfrw.PdfObject('true')))
for pagenum in range(pdf_pages):
annotations = template_pdf.pages[pagenum][ANNOT_KEY]
if annotations is None:
continue
for annotation in annotations:
if annotation[SUBTYPE_KEY] == WIDGET_SUBTYPE_KEY:
if annotation[ANNOT_FIELD_KEY]:
key = annotation[ANNOT_FIELD_KEY][1:-1]
if key in data_dict.keys():
annotation.update(pdfrw.PdfDict(V='{}'.format(data_dict[key]), Ff=1))
else:
annotation.update(pdfrw.PdfDict(Ff=1))
pdfrw.PdfWriter().write(output_pdf_path, template_pdf)
if __name__ == '__main__':
field_data = dict(
name="张三",
age="18",
birthday='2000.01.01',
gender="男",
telephone="1234567890",
)
template_pdf = "C:\\Users\\lenovo\\Desktop\\input.pdf"
output_pdf = "C:\\Users\\lenovo\\Desktop\\output.pdf"
write_fill_able_pdf(template_pdf, field_data, output_pdf)
2.带样式填充
1.自定义字体(可多种)
2.自定义大小
3.自定义颜色
import PyPDF2
import pdfrw
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter
from io import BytesIO
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont
#注册自定义字体
FONT_FILE = "./simhei.ttf"
FONT_NAME = "MyFont"
pdfmetrics.registerFont(TTFont(FONT_NAME, FONT_FILE))
#获取表单字段位置
def get_form_fields_coordinates(template_path):
template_pdf_file = pdfrw.PdfReader(template_path)
template_pdf_file.Root.AcroForm.update(pdfrw.PdfDict(
NeedAppearances=pdfrw.PdfObject('true')))
pdf_pages = len(template_pdf_file.pages)
fields_page_list = []
for page_num in range(pdf_pages):
annotations = template_pdf_file.pages[page_num]['/Annots']
coordinates = {}
if annotations is None:
continue
for annotation in annotations:
if annotation.Subtype == '/Widget' and annotation.FT == '/Tx':
key = annotation['/T'][1:-1]
rect = annotation['/Rect']
x, y = rect[0], rect[1]
coordinates[key] = (x, y)
annotation.update(pdfrw.PdfDict(Ff=1))
fields_page_list.append(coordinates)
pdfrw.PdfWriter().write(temp_pdf, template_pdf_file)
return fields_page_list
#创建覆盖的pdf
def create_overlay_pdf(coordinates, offset, font_size):
buffer = BytesIO()
c = canvas.Canvas(buffer, pagesize=letter)
for field, position in coordinates.items():
if field in field_data.keys():
c.setFont(FONT_NAME, font_size)
c.setFillColorRGB(0, 0, 0)
x, y = position[0], position[1]
x, y = float(x), float(y)
baseline_offset = offset
c.drawString(x, y + baseline_offset, field_data[field])
c.save()
buffer.seek(0)
return buffer.read()
#合并pdf
def merge_pdf(template_path, output_path, list):
with open(template_path, "rb") as f:
template = PyPDF2.PdfFileReader(f)
output = PyPDF2.PdfFileWriter()
for i in range(template.getNumPages()):
page = template.getPage(i)
coordinates = list[i]
try:
overlay_buffer = create_overlay_pdf(coordinates, 4, 10)
overlay_pdf = PyPDF2.PdfFileReader(BytesIO(overlay_buffer))
overlay_page = overlay_pdf.getPage(0)
page.merge_page(overlay_page)
except IndexError:
pass
output.addPage(page)
with open(output_path, "wb") as out_f:
output.write(out_f)
if __name__ == '__main__':
field_data = dict(
name="张三",
age="18",
birthday='2000.01.01',
gender="男",
telephone="1234567890",
)
template_pdf = "C:\\Users\\lenovo\\Desktop\\input.pdf"
output_pdf = "C:\\Users\\lenovo\\Desktop\\output.pdf"
temp_pdf = "C:\\Users\\lenovo\\Desktop\\temp.pdf"
fields_list = get_form_fields_coordinates(template_pdf)
merge_pdf(temp_pdf, output_pdf, fields_list)