14-操作PDF文件
例子1:读取PDF,抽取文字
首先安装PyPDF2
pip install PyPDF2
import PyPDF2
from PyPDF2 import PageObject
pdf_reader = PyPDF2.PdfReader("resources/XGBooster.pdf")
pdf_writer = PyPDF2.PdfWriter()
for page_num in range(len(pdf_reader.pages)):
current_page = pdf_reader.pages[page_num] # type:PageObject
# 从页面中读取文字
# print(current_page.extract_text())
current_page.rotate(90) # 旋转90度
pdf_writer.add_page(current_page) # 在writer里加上旋转后的页面
pdf_writer.add_blank_page() # 加上空白页
with open("resources/XGBooster_fixed.pdf", "wb") as file:
pdf_writer.write(file) # 保存
例子2:对pdf加密
import PyPDF2
pdf_reader = PyPDF2.PdfReader("resources/XGBooster.pdf")
pdf_writer = PyPDF2.PdfWriter()
for page_num in range(len(pdf_reader.pages)):
pdf_writer.add_page(pdf_reader.pages[page_num])
pdf_writer.encrypt("abc@1234")
with open("resources/XGBooster-encrypt.pdf", "wb") as file:
pdf_writer.write(file)
例子3:加水印
水印的pdf可以现场用斜着的文本框现场做一个
import PyPDF2
reader1 = PyPDF2.PdfReader("resources/XGBooster.pdf")
reader2 = PyPDF2.PdfReader("resources/kayotin_water.pdf")
writer = PyPDF2.PdfWriter()
water_mark = reader2.pages[0]
for page_num in range(len(reader1.pages)):
current_page = reader1.pages[page_num]
current_page.merge_page(water_mark)
writer.add_page(current_page)
with open("resources/Boost_water.pdf", "wb") as file:
writer.write(file)
读取文件夹中的文件
import os
for file in os.listdir("resources"):
print(file, os.path.isdir("resources/" + file), os.path.isfile("resources/" + file))
例子4:创建PDF文档,插入图片
用到reportlab这个库
from reportlab.lib.pagesizes import A4
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont
from reportlab.pdfgen import canvas
pdf_canvas = canvas.Canvas("resources/demo.pdf", pagesize=A4)
width, height = A4
# 画图
image = canvas.ImageReader("resources/emp_pic.png")
pdf_canvas.drawImage(image, 20, height - 200, 400, 200)
# 图片对象,位置x轴,位置y轴, 图片宽,图片高
# 显示当前页
pdf_canvas.showPage()
# 注册字体文件,否则写不出中文
pdfmetrics.registerFont(TTFont("Font1", "resources/font/msyh.ttc"))
# 写字
pdf_canvas.setFont("Font1", 40)
pdf_canvas.setFillColorRGB(1, 0, 0, 1) # 前三个值是rgb,最后是透明度
pdf_canvas.rotate(18) # 旋转,此处是逆时针
pdf_canvas.drawString(width // 2 - 120, height // 2, "你好,世界")
# 保存
pdf_canvas.save()