有的pdf可能是加密的,但是没有密码,提供一个解密的方案。
import PyPDF4
import pikepdf
import fitz
def jiemi(pdfpath):
new_pdfpath = pdfpath[:-4] + '_new' + pdfpath[-4:]
fp = open(pdfpath, "rb+")
pdfFile = PyPDF4.pdf.PdfFileReader(fp)
# pdf 解密
if pdfFile.isEncrypted:
pdf = pikepdf.open(pdfpath, password='')
pdf.save(new_pdfpath)
return new_pdfpath
将pdf中的每一页保存为图片
def pdf_image(pdf_name):
img_paths = []
pdf = fitz.Document(pdf_name)
for i,pg in enumerate(range(0, pdf.pageCount)):
page = pdf[pg] # 获得每一页的对象
trans = fitz.Matrix(3.0, 3.0).preRotate(0)
pm = page.getPixmap(matrix=trans, alpha=False) # 获得每一页的流对象
# pm.writePNG(dir_name + os.sep + base_name[:-4] + '_' + '{:0>3d}.png'.format(pg + 1)) # 保存图片
img_path = pdf_name[:-4] + '_' + str(pg+1) + '.jpg'
pm.writePNG(img_path) # 保存图片
img_paths.append(img_path)
pdf.close()
return img_paths