如何从pptx中提取所有图片?用python-pptx轻松实现图片提取
- 从指定的文件夹中,对所有pptx(注意不是ppt,因为两者文档格式不同)进行图片提取。
- 提取出来的图片,以图片原有名称作为文件名,如果遇到文件名有相同,则在文件名后随机加上数字,保存位置为程序中设定的targetPath,如果该目录不存在的话,则会先创建一个。
import os
import re,random
from pptx import Presentation
# coding=gbkimport osimport refrom pptx import Presentationimport random
class ExtractPPTXimg():
def __init__(self,params):
self.errFlag = False
self.msg = ""
self.sourcePath = params["sourcePath"]
if not os.path.exists(self.sourcePath):
self.errFlag = True
self.msg = "源文件夹不存在!"
self.targetPath = params["targetPath"]
if not os.path.exists(self.targetPath):
os.makedirs(self.targetPath)
self.run()
def run(self):
if self.errFlag:
print(self.msg)
return
for file in os.listdir(self.sourcePath):
if not file[-4:] == "pptx":
continue
if re.findall("^~",file):
continue
# 提取图片
self.extractImg(file)
# 保存pptx中的图片
def extractImg(self,file):
fileName,expadName = os.path.splitext(file)
prs = Presentation(os.path.join(self.sourcePath,file))
for slide in prs.slides:
for shape in slide.shapes:
try:
if "image" in shape.image.content_type:
imgName = shape.image.filename
newPath = os.path.join(self.targetPath, fileName)
if not os.path.exists(newPath):
os.makedirs(newPath)
newFile = os.path.join(newPath, imgName)
self.saveImage(newFile,shape.image.blob)
except:
continue
# 保存图片
def saveImage(self,newFile,blob):
if os.path.exists(newFile):
fileName, expadName = os.path.splitext(newFile)
newFile = "{}-{}{}".format(fileName,random.randint(1,1000),expadName)
with open(newFile, "wb") as f:
f.write(blob)
print("已保存{}".format(newFile))
def __str__(self):
return self.msg
if __name__ == '__main__':
# 要进行提取的pptx的所在目录 "targetPath":r"K:\伍德春原创视频\自动化\2020-11-10\img", # 提取后的txt文件要保存到的目录
params = { "sourcePath":r"D:\自动化", "targetPath":r"D:\自动化\img",}
newobj = ExtractPPTXimg(params)