PDF合并,PDF拆分,PDF转换
由于工作需要处理PDF文件,在网上找了一些工具、但还是不太方便、有的甚至需要充钱才能处理。于是多方整理终于可以自己解决了。
python首先导入需要的库如下:
import os
import datetime
import PyPDF2
from PyPDF2 import PdfMerger
from pdf2docx import Converter
import fitz
from PIL import Image
1、PDF合并
def pdf_merger():
pdf_path = 'D:\\工程材料\\'
outfile = 'pdf合并文件_'
pdf_merger_ = PdfMerger()
for root, dirs, files in os.walk(pdf_path):
for f in files:
f_ = os.path.join(root, f)
print(f_)
pdf_merger_.append(f_)
pdf_name = 'D:\\advance\\' + outfile + '.pdf'
pdf_merger_.write(pdf_name)
2、PDF拆分
def pdf_split():
pdf_name1 = 'D:\\查勘.pdf'
outfile = 'pdf拆分文件_'
pdf_reader = PyPDF2.PdfReader(pdf_name1)
pdf_nums = len(pdf_reader.pages)
for num in range(0, pdf_nums):
print(num)
pdf_writer = PyPDF2.PdfWriter()
page_obj = pdf_reader.pages[num]
pdf_writer.add_page(page_obj)
pdf_name2 = 'D:\\advance\\' + outfile + str(num) + '.pdf'
pdf_writer.write(pdf_name2)
3、PDF转换成word文件
def pdf_to_docx():
pdf_name = 'D:\\设计费.pdf'
outfile = 'pdf转换为word文件_'
cv = Converter(pdf_name)
docx_name = 'D:\\advance\\' + outfile + '.docx'
# cv.convert(docx_name, start=0, end=None)
cv.convert(docx_name, start=0, end=2)
# cv.convert(docx_name, pages=[0, 2, 5])
cv.close()
4、PDF转换为图片
def pdf_to_jpe():
pdf_name1 = 'D:\\查勘.pdf'
doc = fitz.open(pdf_name1)
outfile = 'pdf转换为jpe_'
pdf_reader = PyPDF2.PdfReader(pdf_name1)
pdf_nums = len(pdf_reader.pages)
for num in range(0, pdf_nums):
print(num)
page = doc.load_page(num)
pix = page.get_pixmap(matrix=fitz.Matrix(2, 2), alpha=False)
# Matrix(2, 2)清晰度可以接受,越大越清晰
pdf_name2 = 'D:\\advance\\' + outfile + str(num) + '.png'
pix.save(pdf_name2)
5、多个图片单独转换为多个PDF
def jpe_to_pdf_single():
jpe_path = 'D:\\图片\\'
outfile = 'jpe转换为pdf文件_'
for root, dirs, files in os.walk(jpe_path):
for f in files:
f_ = os.path.join(root, f)
print(f_)
pdf_name = 'D:\\advance\\' + outfile + str(f) + '.pdf'
img_file = Image.open(f_)
img_file.save(pdf_name, 'pdf', save_all=True)
6、多个图片合并转换为一个PDF()
def jpe_to_pdf_all():
jpe_path = 'D:\\图片\\'
outfile = 'jpe转换为pdf文件'
pdf_name = 'D:\\advance\\' + outfile + '.pdf'
image_list = []
file_list = []
for root, dirs, files in os.walk(jpe_path):
for f in files:
f_ = os.path.join(root, f)
print(f_)
file_list.append(f_)
for i in file_list:
if file_list.index(i) == 0:
pdf_first = Image.open(i)
# 传入的第一张图片,封面
else:
img_file = Image.open(i)
image_list.append(img_file)
pdf_first.save(pdf_name, 'pdf', save_all=True, append_images=image_list)