一、PDF文件提取文字
import PyPDF2
from PyPDF2.pdf import PageObject
reader = PyPDF2.PdfFileReader(r'resources\XGBoost.pdf')
page = reader.getPage(0)
print(page.extractText())
二、将PDF文件转成PNG图片
def get_filename(file_path):
"""
提取文件名
:param file_path: 文件
:return: 文件名
"""
_, fullname = os.path.split(file_path)
filename, _ = os.path.splitext(fullname)
return fullname
def pdf_image(pdf_file, img_file, zoom_x=10, zoom_y=10, rotation_angle=0):
"""
将PDF文件转成PNG图片
:param pdf_file: PDF文件路径
:param img_file: 保存图片的路径
:param zoom_x: 缩放比例(横向)
:param zoom_y: 缩放比例(纵向)
:param rotation_angle: 旋转角度
"""
pdf = fitz.open(pdf_file)
for page_num in range(0, pdf.pageCount):
page_obj = pdf[page_num]
trans = fitz.Matrix(zoom_x, zoom_y).preRotate(rotation_angle)
pm = page_obj.getPixmap(matrix=trans, alpha=False)
temp = get_filename(pdf_file)
pm.writePNG(f'{img_file}{temp}_{page_num +1}.png')
pdf.close()
if not os.path.exists('resources/'):
os.makedirs('resources/')
pdf_image(r"resources\XGBoost.pdf", r"resources/", 2, 2)
三、在PDF文件中添加水印
import PyPDF2
from PyPDF2.pdf import PageObject
reader = PyPDF2.PdfFileReader(r'resources\watermark.pdf')
wm_page = reader.getPage(0)
reader = PyPDF2.PdfFileReader(r'resources\XGBoost.pdf')
writer = PyPDF2.PdfFileWriter()
for page_num in range(reader.getNumPages()):
page_obj = reader.getPage(page_num)
page_obj.mergePage(wm_page)
writer.addPage(page_obj)
with open(r'resources\XGBoost_watermarked.pdf', 'wb')as file:
writer.write(file)
四、制作水印
from reportlab.lib.pagesizes import A4
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont
from reportlab.pdfgen import canvas
pdf_canvas = canvas.Canvas('resources/demo.pdf', pagesize=A4)
width, height = A4
image = canvas.ImageReader('resources/guido.jpg')
pdf_canvas.drawImage(image, 20, height - 395, 250, 375)
pdf_canvas.showPage()
pdfmetrics.registerFont(TTFont('Font1', 'resources/fonts/Vera.ttf'))
pdfmetrics.registerFont(TTFont('Font2', 'resources/fonts/青呱石头体.ttf'))
pdf_canvas.setFont('Font2', 40)
pdf_canvas.setFillColorRGB(0.9, 0.5, 0.3, 1)
pdf_canvas.drawString(width // 2 - 120, height // 2, '你好,世界!')
pdf_canvas.setFont('Font1', 40)
pdf_canvas.setFillColorRGB(0, 1, 0, 0.5)
pdf_canvas.rotate(18)
pdf_canvas.drawString(250, 250, 'hello, world!')
pdf_canvas.save()