需求
几行代码快速合并PDF文件,免去收费烦恼,多个PDF文件合并,每个文件名当做书签,每个文件的首页当做书签页,可快速定位至浏览位置。
给一个文件夹名称,自动遍历文件夹下的所有PDF文件,如需按序合并,请手动修改文件名,可打印 FileNameWithPath 信息查看PDF导入的顺序。
库
- PyPDF2
- fitz
- PyMuPDF
换源安装
- pip install PyPDF2 -i https://pypi.doubanio.com/simple
- pip install fizt -i https://pypi.doubanio.com/simple
- pip install PyMuPDF -i https://pypi.doubanio.com/simple
整体代码
import PyPDF2
import os
import re
import fitz
# pip install PyPDF2 -i https://pypi.doubanio.com/simple
# ModuleNotFoundError: No module named 'fitz'
# pip install fitz -i https://pypi.doubanio.com/simple
# ModuleNotFoundError: No module named 'frontend'
# pip install PyMuPDF -i https://pypi.doubanio.com/simple
def GetNameByEveryDir(file_dir,Property):
# Input Root Dir and get all img in per Dir.
# Out Every img with its filename and its dir and its path
FileNameWithPath = []
FileName = []
FileDir = []
for root, dirs, files in os.walk(file_dir):
for file in files:
if os.path.splitext(file)[1] in Property:
FileNameWithPath.append(os.path.join(root, file)) # 保存图片路径
FileName.append(file) # 保存图片名称
FileDir.append(root[len(file_dir):]) # 保存图片所在文件夹
return FileName,FileNameWithPath,FileDir
def add_page_index():
"""
添加书签
每个文件的首页当作书签页
"""
global tocs,path,new_path
doc = fitz.open(new_path+'\\merge.pdf')
toc=[]
# 获取源文件目录,不做更改
toc = doc.getToC()
for t in tocs:
toc.append(t)
doc.setToC(toc)
doc.save(new_path+'\\merge_add_page_index.pdf')
doc.close()
def add_pdf():
"""
读取PDF文件夹下的所有文件
合并PDF文件
获取每个文件名字作为书签名
写入书签数组以备写入
"""
global tocs,path,new_path
FileName,FileNameWithPath,FileDir = GetNameByEveryDir(path,'.pdf')
opened_file = [open(file_name,'rb') for file_name in FileNameWithPath]
pdfFM = PyPDF2.PdfFileMerger()
page_sum=1
tocs = [] # [[目录level,标题,页码]]
for file in range(len(opened_file)):
title=[1,str(FileName[file][:-4]),page_sum]
reader = PyPDF2.PdfFileReader(opened_file[file])
page = reader.getNumPages() #读取每个文件页数
page_sum+=page
tocs.append(title)
pdfFM.append(opened_file[file])
with open(new_path + "\\merge.pdf", 'wb') as write_out_file:
pdfFM.write(write_out_file)+
for file in opened_file:
file.close()
if __name__ == '__main__':
global tocs,path,new_path
tocs=[]
path='.\\test' #包含全部PDF文件的路径
new_path='.\\merge_dir' #合并后的目录
if not os.path.exists(new_path):
os.makedirs(new_path)
add_pdf()
add_page_index()
#完成后打开文件夹 windows下使用
os.system("start explorer %s" % new_path)
注意
该脚本在windows下测试,在linux下使用可能需要更改路径或者修改源码。