效果:有60个结构类似的PPT,每个PPT里面有5页,需抓取特定标题页面下特定位置的PPT内容。
思路:①遍历PPT文件;②定义ppt读取函数;③遍历PPT文件,读取目标值;④# 将值存放进去excel
https://pythonbrief.blog.csdn.net/article/details/109089030
read_ppt.py
import imp
from pptx import Presentation
import os
import shutil
from openpyxl import load_workbook
from datetime import datetime
### 遍历文件
file_address = r'Total'
talent_excel= r"人員名單 .xlsx"
wrong_excel= r"读取记录.xlsx"
file_address_list = []
for filename in os.listdir(file_address):
if filename.endswith('.ppt'):
file_address_list.append(filename)
elif filename.endswith('.pptx'):
file_address_list.append(filename)
#print('file_address_list',file_address_list)
# 定义ppt读取函数
def read_ppt_goal(filename):
prs = Presentation(filename)
person_goal= ''
team_goal= ''
employee_id = filename.split('-')[1]
#print('employee_id',employee_id)
# 获取Slide
for slide in prs.slides:
#print(slide)
#3)获取Shape形状
for shape in slide.shapes:
#print(shape)
#print('type of shape',type(shape))
#判断每个Shape中是否存在文字
if shape.has_text_frame:
text_frame = shape.text_frame
#print(text_frame.text)
# 找出表格里面的值
if 'GraphicFrame' in str(type(shape)) and ('一些字眼' in text_frame.text and '延伸' not in text_frame.text):
#print('ok')
table = shape.table
#print(table)
person_goal= table.cell(1, 1).text.replace('\n', '').replace('\x0b', '')
#print('person_goal',person_goal)
# 找出表格里面的值
#if 'GraphicFrame' in str(type(shape)) and '任務2 : 團體目標9宮格延伸' in text_frame.text:
#if 'GraphicFrame' in str(type(shape)) and ('任務2 : 團體目標9宮格' in text_frame.text and '延伸' not in text_frame.text):
if 'GraphicFrame' in str(type(shape)) and ('一些字眼' in text_frame.text):
#print('ok')
table = shape.table
#print(table)
team_goal= table.cell(4, 4).text.replace('\n', '').replace('\x0b', '')
#print('team_goal',team_goal)
# print('person_goal',person_goal)
# print('team_goal',team_goal)
return employee_id,person_goal,team_goal
wb_ppt = load_workbook(wrong_excel)
sheet_ppt = wb_ppt['sheet1']
sheet_wrong = wb_ppt['sheet2']
# 遍历文件,读取目标值
talent_data = []
print(len(file_address_list))
i = 1
j = 1
for filename in file_address_list:
try:
i = i + 1
employee_id,person_goal,team_goal = read_ppt_goal(filename)
talent_data.append([employee_id,person_goal,team_goal])
print(employee_id,person_goal,team_goal)
#保存在sheet_ppt里面
sheet_ppt.cell(row=i, column=1).value = employee_id
sheet_ppt.cell(row=i, column=2).value = person_goal
sheet_ppt.cell(row=i, column=3).value = team_goal
wb_ppt.save(wrong_excel)
except Exception:
j = j + 1
print(filename,"读取失败")
sheet_wrong.cell(row=j, column=1).value = filename + "读取失败"
wb_ppt.save(wrong_excel)
pass
print(talent_data)
print('len of talent data',len(talent_data))
# 将值存放进去excel
talent_wb = load_workbook(talent_excel)
talent_sheet = talent_wb.active
max_row = talent_sheet.max_row
for i in range(2,max_row):
for listdata in talent_data:
print(i,listdata)
if str(talent_sheet.cell(row=i, column=5).value) == str(listdata[0]):
talent_sheet.cell(row=i, column=20).value = str(listdata[1])
if talent_sheet.cell(row=i, column=20).value:
talent_sheet.cell(row=i, column=19).value = 'Y'
else:
talent_sheet.cell(row=i, column=19).value = 'N'
talent_sheet.cell(row=i, column=23).value = str(listdata[2])
if talent_sheet.cell(row=i, column=23).value:
talent_sheet.cell(row=i, column=22).value = 'Y'
else:
talent_sheet.cell(row=i, column=22).value = 'N'
talent_wb.save(talent_excel)