from PyPDF2 import PdfFileReader, PdfFileWriter
def merge_pdfs(paths, output):
pdf_writer = PdfFileWriter()
for path in paths:
pdf_reader = PdfFileReader(path)
for page in range(pdf_reader.getNumPages()):
# Add each page to the writer object
pdf_writer.addPage(pdf_reader.getPage(page))
# Write out the merged PDF
with open(output, 'wb') as out:
pdf_writer.write(out)
# python 读取文件夹中的文件
import os
path = "/Users/bella/Downloads/pdf" #文件夹目录
files= os.listdir(path)
# 排序!!!
files.sort(key=lambda x: int(x.split('.')[0]))
files[:5]
[‘2400_001.pdf’,
‘2401_001.pdf’,
‘2402_001.pdf’,
‘2403_001.pdf’,
‘2404_001.pdf’]
# path = "/Users/bella/Downloads/pdf" #文件夹目录
files= os.listdir(path)
files[0].split('.')[0]
# 补全全部路径
paths = []
for i in files:
paths.append(path+str('/')+i)
paths[:5]
[’/Users/bella/Downloads/pdf/2400_001.pdf’,
‘/Users/bella/Downloads/pdf/2401_001.pdf’,
‘/Users/bella/Downloads/pdf/2402_001.pdf’,
‘/Users/bella/Downloads/pdf/2403_001.pdf’,
‘/Users/bella/Downloads/pdf/2404_001.pdf’]
merge_pdfs(paths, output='大数据部·数据应用部·32491·孙李白《护照》.doxc')