1. 安装PyPDF2
pip install PyPDF2
2. pdf合并
import os
from PyPDF2 import PdfMerger
from datetime import datetime
"""
介绍:
将 merge_input_folder 文件夹下的所有pdf文件合并成为一个pdf文件
使用方法:
1.首先在本目录下新建一个 merge_input_folder 文件夹
2.将需要合并的pdf文件放入到其中(注意顺序)
3.修改 output_file_name 变量的值
4.运行代码
结果:
合并后的文件在 merge_output_folder 目录下
"""
cur_path = os.path.dirname(__file__)
def merge_pdfs(input_folder, output_pdf_path):
merger = PdfMerger()
for root, _, files in os.walk(input_folder):
for filename in files:
if filename.endswith(".pdf"):
file_path = os.path.join(root, filename)
merger.append(file_path)
with open(output_pdf_path, "wb") as output_file:
merger.write(output_file)
print(f"Merged PDF saved at {output_pdf_path}")
def get_output_file_name(output_file_name):
formatted_time = datetime.now().strftime("%Y_%m_%d_%H_%M_%S_")
dir_path = cur_path + "/merge_output_folder/"
if not os.path.exists(dir_path):
os.makedirs(dir_path)
return dir_path + formatted_time + output_file_name + ".pdf"
if __name__ == "__main__":
output_file_name = "name"
input_folder = cur_path + "/merge_input_folder"
output_pdf_path = get_output_file_name(output_file_name)
merge_pdfs(input_folder, output_pdf_path)
3. pdf拆分
import os
from PyPDF2 import PdfReader, PdfWriter
"""
介绍:
将指定路径下的pdf文件, 按照 split_num 的大小进行拆分, 并将拆分后的文件放到指定文件夹
使用方法:
1.修改 input_file 变量值为待拆分pdf文件的文件位置 (注意斜线的方向的问题)
2.运行代码
结果:
拆分后的文件在 split_output_folder 目录下
"""
cur_path = os.path.dirname(__file__)
def split_pdf(input_pdf_path, output_pdfs_folder, split_num):
input_pdf = PdfReader(input_pdf_path)
base_name = os.path.splitext(os.path.basename(input_pdf_path))[0]
for i in range(0, len(input_pdf.pages), split_num):
output_pdf = PdfWriter()
end_page = min(i + split_num, len(input_pdf.pages))
for page_num in range(i, end_page):
output_pdf.add_page(input_pdf.pages[page_num])
output_pdf_path = f"{output_pdfs_folder}{base_name}_part_{i + 1}-{end_page}.pdf"
with open(output_pdf_path, "wb") as output_file:
output_pdf.write(output_file)
print(f"Created {output_pdf_path}")
def get_output_folder():
dir_path = cur_path + "/split_output_folder/"
if not os.path.exists(dir_path):
os.makedirs(dir_path)
return dir_path
if __name__ == "__main__":
input_file = """E:/data/2024_04_16_20_10_56_name.pdf"""
split_num = 30
split_pdf(input_file, get_output_folder(), split_num)