1. pdf转图片及pptx
# -*- coding: utf-8 -*-
from pptx import Presentation
from pdf2image import convert_from_path, convert_from_bytes
from pdf2image.exceptions import (
PDFInfoNotInstalledError,
PDFPageCountError,
PDFSyntaxError
)
from sys import argv
from PIL import Image
from pptx.enum.shapes import MSO_SHAPE
from pptx.util import Inches, Pt
from pptx.dml.color import RGBColor
from pathlib import Path
fff=argv[1]
pp=Path.cwd()
fn=fff+'.pdf'
filename=pp/fn
print(filename)
print("Creating %s" % fff)
prs = Presentation()
width,height=argv[3].split('*')
prs.slide_width = Inches(16)
prs.slide_height = Inches(9)
print(argv[2],width,height)
pages = convert_from_path(filename,dpi=int(argv[2]), size=(int(width)*100,int(height)*100))
jpgs=pp/'jpgs'
if not jpgs.exists():
jpgs.mkdir()
for index, page in enumerate(pages):
name=fff+"-(%d).png" % index
jpg_file =jpgs/name
# print(jpg_file)
page.save(jpg_file, 'PNG')
image = Image.open(jpg_file)
height = image.height
width = image.width
#
if height > width:
adjusted = image.rotate(270, expand=True)
adjusted.save(jpg_file)
#
#
title_slide_layout = prs.slide_layouts[6]
slide = prs.slides.add_slide(title_slide_layout)
left = top = 0
jpg_file=str(jpg_file)
print(jpg_file)
slide.shapes.add_picture(jpg_file, left,top,height = prs.slide_height)
pptname='%s.pptx' % fff
prs.save(pp/pptname)
print("Saved")
#use
2. 批量ppt转pdf
import comtypes.client
import os
def init_powerpoint():
powerpoint = comtypes.client.CreateObject("Powerpoint.Application")
powerpoint.Visible = 1
return powerpoint
def ppt_to_pdf(powerpoint, inputFileName, outputFileName, formatType = 32):
if outputFileName[-3:] != 'pdf':
outputFileName = outputFileName + ".pdf"
deck = powerpoint.Presentations.Open(inputFileName)
deck.SaveAs(outputFileName, formatType) # formatType = 32 for ppt to pdf
deck.Close()
def convert_files_in_folder(powerpoint, folder):
files = os.listdir(folder)
pptfiles = [f for f in files if f.endswith((".ppt", ".pptx"))]
for pptfile in pptfiles:
fullpath = os.path.join(cwd, pptfile)
ppt_to_pdf(powerpoint, fullpath, fullpath)
if __name__ == "__main__":
powerpoint = init_powerpoint()
cwd = os.getcwd()
convert_files_in_folder(powerpoint, cwd)
powerpoint.Quit()
其中可能会出现:Windows 安装pdf2image运行后遇到PDFInfoNotInstalledError解决办法
安装pdf2image,安装没有报错,运行之后报错:
pdf2image.exceptions.PDFInfoNotInstalledError: Unable to get page count. Is poppler installed and in PATH?
一开始是想直接安装PDFInfo,或者poppler,但是都安装失败。按照网友提示安装python-poppler也因为ndk版本不对失败。
最终解决办法:
1、首先通过poppler-windows下载地址下载压缩包
2、然后解压到自定义文件目录,随后添加到环境变量。
3、最后就是更改安装包内的pdf2image.py文件中的poppler路径
def convert_from_path(
pdf_path,
dpi=200,
output_folder=None,
first_page=None,
last_page=None,
fmt="ppm",
jpegopt=None,
thread_count=1,
userpw=None,
use_cropbox=False,
strict=False,
transparent=False,
single_file=False,
output_file=uuid_generator(),
poppler_path=r'D:\poppler-0.68.0\bin', #将这里改为你解压安装的poppler路径
grayscale=False,
size=None,
paths_only=False,
use_pdftocairo=False,
timeout=None,
保存即可。