一、只输出文本内容
from pptx import Presentation
from docx import Document
wordfile = Document()
filepath = r"C:\Users\18703\Desktop\智家经分\智家经分v2.pptx"
pptx = Presentation(filepath)
for slide in pptx.slides:
for shape in slide.shapes:
if shape.has_text_frame:
text_frame = shape.text_frame
for paragraph in text_frame.paragraphs:
wordfile.add_paragraph(paragraph.text)
save_path = r"C:\Users\18703\Desktop\智家经分\智家经分.docx"
wordfile.save(save_path)
二、输出文本、表格、图片内容
import pandas as pd
from pptx import Presentation
from pptx.shapes.picture import Picture
prs = Presentation(r"C:\Users\18703\Desktop\智家经分\智家经分v2.pptx")
index = 1
wordfile = Document()
for slide in prs.slides:
for shape in slide.shapes:
if shape.has_text_frame:
for paragraph in shape.text_frame.paragraphs:
if paragraph.text:
print(paragraph.text)
elif shape.has_table:
one_table_data = []
for row in shape.table.rows:
row_data = []
for cell in row.cells:
c = cell.text
row_data.append(c)
one_table_data.append(row_data)
print(one_table_data)
elif isinstance(shape, Picture):
with open(f'{index}.jpg', 'wb') as f:
f.write(shape.image.blob)
index += 1
print(f)