python PDF 文档转图片
"""
将PDF文档每一页提取为图片
"""
import fitz
import os
def mkdir(path):
if not os.path.exists(path):
os.makedirs(path)
else:
return
def get_filename(file_path):
"""获取不带路径和后缀的文件名
:param file_path: 文件路径
"""
_, fullname = os.path.split(file_path)
filename, _ = os.path.splitext(fullname)
return filename
def pdf_image(pdfPath, imgPath, zoom_x, zoom_y):
"""
:param pdfPath: pdf文件的路径
:param imgPath: 图像要保存的文件夹
:param zoom_x: 横方向的缩放系数
:param zoom_y: 纵方向的缩放系数
:return: None
"""
pdf = fitz.open(pdfPath)
name = get_filename(pdfPath)
for page_num in range(0, pdf.pageCount):
page_obj = pdf[page_num]
trans = fitz.Matrix(zoom_x, zoom_y)
pm = page_obj.getPixmap(matrix=trans, alpha=False)
mkdir(imgPath + name)
pm.writePNG(imgPath + name + '/' + str(page_num + 1) + ".png")
pdf.close()
def get_all_filepath(file_dir=r'pdfs/'):
"""拿到每个文件的文件路径"""
for items in os.walk(file_dir, topdown=False):
return [file_dir+file_name for file_name in items[2]]
def get_image():
"""循环得到每个PDF自己的图片"""
for file_path in get_all_filepath():
pdf_image(file_path, r"images/", 3, 3)
if __name__ == '__main__':
get_image()
复制PDF文档并加密
"""
复制PDF文档 PDF文档加密
"""
import PyPDF2
from PyPDF2.pdf import PageObject
reader = PyPDF2.PdfFileReader('resources/XGBoost.pdf')
writer = PyPDF2.PdfFileWriter()
for page_num in range(reader.getNumPages()):
page_odj = reader.getPage(page_num)
writer.addPage(page_odj)
writer.encrypt('123456')
with open('resources/new_XGBoost.pdf', 'wb') as file:
writer.write(file)
为PDF文档添加水印
"""
PDF文档添加水印
"""
import PyPDF2
from PyPDF2.pdf import PageObject
reader = PyPDF2.PdfFileReader('resources/watermark.pdf')
wm_page = reader.getPage(0)
reader = PyPDF2.PdfFileReader('resources/XGBoost.pdf')
writer = PyPDF2.PdfFileWriter()
for page_num in range(reader.getNumPages()):
page_obj = reader.getPage(page_num)
page_obj.mergePage(wm_page)
writer.addPage(page_obj)
with open('resources/XGBoost_watermarked.pdf', 'wb') as file:
writer.write(file)
创建自定义的PDF水印文件
"""
创建自定义的水印PDF文件
"""
from reportlab.lib.pagesizes import A4
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont
from reportlab.pdfgen import canvas
pdf_canvas = canvas.Canvas('demo.pdf', pagesize=A4)
width, height = A4
print(width, height)
pdfmetrics.registerFont(TTFont('Font1', 'fonts/Action.ttf'))
pdfmetrics.registerFont(TTFont('Font2', 'fonts/青呱石头体.ttf'))
pdf_canvas.setFont('Font2', 40)
pdf_canvas.setFillColorRGB(0.9, 0.5, 0.3, 1)
pdf_canvas.drawString(width // 2 - 120, height // 2, '你好,世界!')
pdf_canvas.setFont('Font1', 40)
pdf_canvas.setFillColorRGB(0, 1, 0, 0.5)
pdf_canvas.rotate(18)
pdf_canvas.drawString(250, 250, 'hello, world!')
pdf_canvas.save()