from PIL import Image
import os
import io
import fitz
import time
def single_pdf_clearwater(pdf_path: str):
pdf = fitz.open(pdf_path)
pdf_img = fitz.open()
for page_inf in pdf:
definition = 6
matrix = fitz.Matrix(definition, definition)
img = page_inf.get_pixmap(matrix=matrix).tobytes()
img = Image.open(io.BytesIO(img))
width, height = img.size
for i in range(width):
for j in range(height):
if sum(img.getpixel((i, j))) > 590:
img.putpixel((i, j), (255, 255, 255))
img = img.tobytes()
img = fitz.Pixmap(fitz.csRGB, width, height, img)
img = img.tobytes()
img = fitz.open("png", img)
pdf_bytes = img.convert_to_pdf()
pdf_img.insert_pdf(fitz.open("pdf", pdf_bytes))
if not os.path.exists("output"):
os.makedirs("output")
pdf_img.save("output/去水印pdf_" + os.path.basename(pdf_path))
def group_pdf_clearwater(path_array):
print("************去水印时间比较久***********")
for pdf_path in path_array:
print(pdf_path, "去水印中...")
single_pdf_clearwater(pdf_path)
print("完成")
def folder_pdf_files(file_path: str):
file_list = []
file_list.append(file_path)
print(file_path, ": 有", len(file_list), "个pdf文件")
return file_list
if __name__ == '__main__':
time_start = time.time()
path_list = folder_pdf_files("pdf文件名")
print(path_list)
group_pdf_clearwater(path_list)
time_end = time.time()
print("程序运行时间:", round(time_end - time_start, 2), "秒")
from PyPDF2 import PdfFileReader, PdfFileWriter
def split_pdf(start_page,end_page):
try:
read_file = input("请输入要拆分的PDF名字(例如test.pdf):")
fp_read_file = open(read_file, 'rb')
pdf_input = PdfFileReader(fp_read_file)
page_count = pdf_input.getNumPages()
print("该文件共有{}页".format(page_count))
name,pdf=read_file.split(".")
pdf_file=name+"_"+str(start_page)+'-'+str(end_page)+'.'+pdf
try:
print(f'开始分割{start_page}页-{end_page}页,保存为{pdf_file}......')
pdf_output = PdfFileWriter()
for i in range(start_page, end_page):
pdf_output.addPage(pdf_input.getPage(i))
with open(pdf_file, 'wb') as sub_fp:
pdf_output.write(sub_fp)
except Exception as e:
print(e)
except Exception as e:
print(e)
if __name__ == '__main__':
split_pdf(12,17)