import json
from win32com import client as wc
def Read_PPTX(file): #PPT PPTX PPTM PPS PPSM PPSX
ppt = wc.Dispatch('PowerPoint.Application')
ppt.Visible = 0
pptSel = ppt.Presentations.Open(file)
slide_count = pptSel.Slides.Count
name = ['page', 'content']
results = []
for i in range(1, slide_count + 1):
result = {}
result[name[0]] = i
content = []
shape_count = pptSel.Slides(i).Shapes.Count
for j in range(1, shape_count + 1):
if pptSel.Slides(i).Shapes(j).HasTextFrame:
net = pptSel.Slides(i).Shapes(j).TextFrame.TextRange.Text.strip()
content.append(net)
result[name[1]] = content
results.append(result)
ppt.Quit()
jas = json.dumps(results, ensure_ascii=False, indent=4)
result = ''
for item in json.loads(jas):
result = result + str(item['content']).replace(r'\r','')
return result
if __name__ == '__main__':
path = r'******\测试.pptm'
j = Read_PPTX(path)
print(j.encode('GBK', 'ignore').decode('GBK'))
优点:任意类型PPT皆可读取
缺点:反应迟钝,需要windows支持,还会弹出打开窗口
不依赖windows方法
import pptx
def read_PPTX(file_path): #pptx,pptm,
presentation = pptx.Presentation(file_path)
results = ''
for slide in presentation.slides:
for shape in slide.shapes:
if shape.has_text_frame:
for paragraph in shape.text_frame.paragraphs:
for run in paragraph.runs:
results = results + run.text
return results.encode('GBK', 'ignore').decode('GBK')
file_path = r'****\测试.pptx'
print(read_PPTX(file_path))