用python将Adobe Acrobat DC表单PDF填充

少为人

已于 2023-07-28 16:12:19 修改

阅读量500

点赞数

分类专栏： python 文章标签： python pdf 前端

于 2023-07-28 16:09:45 首次发布

本文链接：https://blog.csdn.net/weixin_50025568/article/details/131981872

版权

python 专栏收录该内容

1 篇文章 0 订阅

订阅专栏

前置条件：模板为使用Adobe Acrobat DC编辑的有表单的pdf
相关库版本如下：
reportlab：3.5.0
PyPDF2：2.10.0
pdfrw：0.4

1.正常填充

使用默认样式，修改其他样式可能无效

import pdfrw

ANNOT_KEY = '/Annots'
ANNOT_FIELD_KEY = '/T'
ANNOT_RECT_KEY = '/Rect'
SUBTYPE_KEY = '/Subtype'
WIDGET_SUBTYPE_KEY = '/Widget'


# 填写pdf模板并输出新的pdf
def write_fill_able_pdf(template_report_path, data_dict, output_pdf_path):
    template_pdf = pdfrw.PdfReader(template_report_path)
    pdf_pages = len(template_pdf.pages)
    template_pdf.Root.AcroForm.update(pdfrw.PdfDict(
        NeedAppearances=pdfrw.PdfObject('true')))
    for pagenum in range(pdf_pages):
        annotations = template_pdf.pages[pagenum][ANNOT_KEY]
        if annotations is None:
            continue
        for annotation in annotations:
            if annotation[SUBTYPE_KEY] == WIDGET_SUBTYPE_KEY:
                if annotation[ANNOT_FIELD_KEY]:
                    key = annotation[ANNOT_FIELD_KEY][1:-1]
                    if key in data_dict.keys():
                        annotation.update(pdfrw.PdfDict(V='{}'.format(data_dict[key]), Ff=1))
                    else:
                        annotation.update(pdfrw.PdfDict(Ff=1))
    pdfrw.PdfWriter().write(output_pdf_path, template_pdf)


if __name__ == '__main__':
    field_data = dict(
        name="张三",
        age="18",
        birthday='2000.01.01',
        gender="男",
        telephone="1234567890",
    )
    template_pdf = "C:\\Users\\lenovo\\Desktop\\input.pdf"
    output_pdf = "C:\\Users\\lenovo\\Desktop\\output.pdf"

    write_fill_able_pdf(template_pdf, field_data, output_pdf)

2.带样式填充

1.自定义字体（可多种）
2.自定义大小
3.自定义颜色

import PyPDF2
import pdfrw
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter
from io import BytesIO
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont

#注册自定义字体
FONT_FILE = "./simhei.ttf"
FONT_NAME = "MyFont"
pdfmetrics.registerFont(TTFont(FONT_NAME, FONT_FILE))

#获取表单字段位置
def get_form_fields_coordinates(template_path):
    template_pdf_file = pdfrw.PdfReader(template_path)
    template_pdf_file.Root.AcroForm.update(pdfrw.PdfDict(
        NeedAppearances=pdfrw.PdfObject('true')))
    pdf_pages = len(template_pdf_file.pages)
    fields_page_list = []
    for page_num in range(pdf_pages):
        annotations = template_pdf_file.pages[page_num]['/Annots']
        coordinates = {}
        if annotations is None:
            continue
        for annotation in annotations:
            if annotation.Subtype == '/Widget' and annotation.FT == '/Tx':
                key = annotation['/T'][1:-1]
                rect = annotation['/Rect']
                x, y = rect[0], rect[1]
                coordinates[key] = (x, y)
            annotation.update(pdfrw.PdfDict(Ff=1))
        fields_page_list.append(coordinates)
    pdfrw.PdfWriter().write(temp_pdf, template_pdf_file)
    return fields_page_list

#创建覆盖的pdf
def create_overlay_pdf(coordinates, offset, font_size):
    buffer = BytesIO()
    c = canvas.Canvas(buffer, pagesize=letter)
    for field, position in coordinates.items():
        if field in field_data.keys():
            c.setFont(FONT_NAME, font_size)
            c.setFillColorRGB(0, 0, 0)
            x, y = position[0], position[1]
            x, y = float(x), float(y)
            baseline_offset = offset
            c.drawString(x, y + baseline_offset, field_data[field])

    c.save()
    buffer.seek(0)
    return buffer.read()

#合并pdf
def merge_pdf(template_path, output_path, list):
    with open(template_path, "rb") as f:
        template = PyPDF2.PdfFileReader(f)
        output = PyPDF2.PdfFileWriter()
        for i in range(template.getNumPages()):
            page = template.getPage(i)
            coordinates = list[i]
            try:
                overlay_buffer = create_overlay_pdf(coordinates, 4, 10)
                overlay_pdf = PyPDF2.PdfFileReader(BytesIO(overlay_buffer))
                overlay_page = overlay_pdf.getPage(0)
                page.merge_page(overlay_page)
            except IndexError:
                pass
            output.addPage(page)

        with open(output_path, "wb") as out_f:
            output.write(out_f)


if __name__ == '__main__':
    field_data = dict(
        name="张三",
        age="18",
        birthday='2000.01.01',
        gender="男",
        telephone="1234567890",
    )
    template_pdf = "C:\\Users\\lenovo\\Desktop\\input.pdf"
    output_pdf = "C:\\Users\\lenovo\\Desktop\\output.pdf"
    temp_pdf = "C:\\Users\\lenovo\\Desktop\\temp.pdf"

    fields_list = get_form_fields_coordinates(template_pdf)
    merge_pdf(temp_pdf, output_pdf, fields_list)