之前得到了很多文章的pdf,现在需要把它们合到一个pdf文件中。
使用python库pypdf2可以实现这个功能。
首先要安装
pip install pypdf2
然后运行下面的示例 python mergePDF.py
mergePDF.py
# -*- coding:utf-8*-
import os
import os.path
from PyPDF2 import PdfFileReader, PdfFileWriter
import time
import glob
def getFileName(filepath):
file_list = glob.glob("{}/*.pdf".format(filepath))
# 默认按照字典序排序,也可以安装自定义的方式排序
# file_list.sort()
return file_list
###########合并同一个文件夹下所有PDF文件###############
def MergePDF(filepath, outfile):
output = PdfFileWriter()
outputPages = 0
pdf_fileName = getFileName(filepath)
for each_file in pdf_fileName:
print("adding %s" % each_file)
# 读取源pdf文件
input = PdfFileReader(open(each_file, "rb"))
# 如果pdf文件已经加密,必须首先解密才能使用pyPdf
if input.isEncrypted == True:
input.decrypt("map")
# 获得源pdf文件中页面总数
pageCount = input.getNumPages()
outputPages += pageCount
print("%s has %d pages" % (each_file, pageCount))
# 分别将page添加到输出output中
for iPage in range(pageCount):
output.addPage(input.getPage(iPage))
# 添加书签
each_file = os.path.basename(each_file)
output.addBookmark(
title=each_file[:-3], pagenum=outputPages - pageCount)
print("All Pages Number: " + str(outputPages))
# 最后写pdf文件
outputStream = open(os.path.join(filepath, outfile), "wb")
output.write(outputStream)
outputStream.close()
print("finished")
if __name__ == '__main__':
time1 = time.time()
file_dir = '.' # 多个pdf所在目录
out_file = u"test.pdf" # 合并后的pdf文件名
MergePDF(file_dir, out_file)
time2 = time.time()
print(u'总共耗时: %.4f s' % (time2 - time1))
结果输出如下:
D:\Program Files\phantomjs-2.1.1-windows\bin>python mergePDF.py
adding 3.pdf
3.pdf has 1 pages
adding 4.pdf
4.pdf has 1 pages
adding baidu.pdf
baidu.pdf has 1 pages
adding fun.pdf
fun.pdf has 1 pages
adding fun1.pdf
fun1.pdf has 1 pages
adding fun10.pdf
fun10.pdf has 8 pages
adding fun2.pdf
fun2.pdf has 1 pages
adding fun3.pdf
fun3.pdf has 1 pages
adding fun4.pdf
fun4.pdf has 8 pages
adding fun5.pdf
fun5.pdf has 1 pages
adding fun6.pdf
fun6.pdf has 1 pages
adding fun7.pdf
fun7.pdf has 8 pages
adding fun8.pdf
fun8.pdf has 1 pages
adding fun9.pdf
fun9.pdf has 1 pages
adding sai.pdf
sai.pdf has 1 pages
All Pages Number: 36
finished
总共耗时: 0.4700 s
生成的pdf效果如下: