因为一个需求,需要把pdf的第一页做成专辑封面,
然后数量很大,就用python来做这个事情,但是遇到了很多困难:
首先先说一下怎么整吧
我的环境 win10 64位, python 2.7 32位
需要安装以下包:
pyPdf(pip可以安装)
wand(pip可以安装)
http://101.96.10.64/ftp.icm.edu.pl/packages/ImageMagick/binaries/ImageMagick-6.7.7-6-Q16-windows-dll.exe
https://mirrors.netix.net/sourceforge/g/gh/ghostscript/GPL%20Ghostscript/9.05/gs905w64.exe
反正后两个包版本别错,踩过无数坑的经验,然后上python 代码:
#coding=utf8 import os from wand.image import Image from pyPdf import PdfFileReader, PdfFileWriter path = r'C:\Users\guanjia\Desktop\2.PDF' # 传入整个pdf的二进制流,制成一整张图片 # (所以如果只想要一张图,只能先用getPagesOfPdf()切分pdf) def convert_all_pdf_pages_to_png(pdf_file_blob): pdf = Image(blob=pdf_file_blob) pages = len(pdf.sequence) image = Image( width=pdf.width, height=pdf.height * pages ) for i in xrange(pages): image.composite( pdf.sequence[i], top=pdf.height * i, left=0 ) return image.make_blob('png') # 获取单页的pdf,组成一个新的pdf # 页数可以是参数,这里是 0 def getPagesOfPdf(path): pdfOne = PdfFileReader(file(path, "rb")) filepath, ext = path.split('.') if ext.lower() != 'pdf': raise Exception, '文件必须是pdf' tmp_pdf_path = filepath + '_thumb.pdf' outputStream = file(tmp_pdf_path, "wb") output = PdfFileWriter() output.addPage(pdfOne.getPage(0)) output.write(outputStream) outputStream.close() return tmp_pdf_path # def createPng(tmp_pdf_path): with open(tmp_pdf_path, 'rb') as pdf_file: filepath, ext = tmp_pdf_path.split('.') if ext.lower() != 'pdf': raise Exception, '文件必须是pdf' with open(filepath + '.png', 'wb') as pdf_write: pdf_write.write(convert_all_pdf_pages_to_png(pdf_file.read())) os.remove(tmp_pdf_path) if __name__ == '__main__': tmp_pdf_path = getPagesOfPdf(path)# 切分pdf createPng(tmp_pdf_path) # pdf 做成图片