先导入所需的Python包:
from pptx import Presentation
from docx import Document
from docx.shared import Inches
import os
# author
# terminator
注意:安装pptx和docx包时 代码为:
pip install python-pptx
pip install python-docx
之后由于ppt的话会有图像和文字综合,需要分别对其处理:
对于文字:
for slide in ppt.slides:
for shape in slide.shapes:
if shape.has_text_frame:
text = shape.text
if text.strip():
# 将PPT中的文本添加到Word文档中
doc.add_paragraph(text)
对于图像:
if shape.shape_type == 13: # 形状类型为图像
img = shape.image
img_stream = img.blob
img_filename = 'temp_image.png' # 临时保存图像的文件名
with open(img_filename, 'wb') as img_file:
img_file.write(img_stream)
doc.add_picture(img_filename, width=Inches(4)) # 调整图像大小
# 删除临时图像文件
os.remove(img_filename)
完整代码:
def ppt_to_word(ppt_filename, word_filename):
# 打开PPT文件
ppt = Presentation(ppt_filename)
# 创建一个新的Word文档
doc = Document()
for slide in ppt.slides:
for shape in slide.shapes:
if shape.has_text_frame:
text = shape.text
if text.strip():
# 将PPT中的文本添加到Word文档中
doc.add_paragraph(text)
if shape.shape_type == 13: # 形状类型为图像
img = shape.image
img_stream = img.blob
img_filename = 'temp_image.png' # 临时保存图像的文件名
with open(img_filename, 'wb') as img_file:
img_file.write(img_stream)
doc.add_picture(img_filename, width=Inches(4)) # 调整图像大小
# 删除临时图像文件
os.remove(img_filename)
# 保存Word文档
doc.save(word_filename)