【办公自动化】Python抓取PPT的值到excel-20220506

效果:有60个结构类似的PPT,每个PPT里面有5页,需抓取特定标题页面下特定位置的PPT内容。

思路:①遍历PPT文件;②定义ppt读取函数;③遍历PPT文件,读取目标值;④# 将值存放进去excel

https://pythonbrief.blog.csdn.net/article/details/109089030

read_ppt.py

import imp
from pptx import Presentation
import os
import shutil
from openpyxl import load_workbook
from datetime import datetime

### 遍历文件
file_address = r'Total'
talent_excel= r"人員名單 .xlsx"
wrong_excel= r"读取记录.xlsx"

file_address_list = []
for filename in os.listdir(file_address):
    if filename.endswith('.ppt'):
        file_address_list.append(filename)
    elif filename.endswith('.pptx'):
        file_address_list.append(filename)
#print('file_address_list',file_address_list)




# 定义ppt读取函数
def read_ppt_goal(filename):
    prs = Presentation(filename)
    person_goal= ''
    team_goal= ''

    employee_id = filename.split('-')[1]
    #print('employee_id',employee_id)

    # 获取Slide
    for slide in prs.slides:
        #print(slide)
        #3)获取Shape形状
        for shape in slide.shapes:
            #print(shape)
            #print('type of shape',type(shape))
            #判断每个Shape中是否存在文字
            if shape.has_text_frame:
                text_frame = shape.text_frame
                #print(text_frame.text)

            # 找出表格里面的值
            if 'GraphicFrame' in str(type(shape)) and ('一些字眼' in text_frame.text and '延伸' not in text_frame.text):
                    #print('ok')
                    table = shape.table
                    #print(table)
                    person_goal= table.cell(1, 1).text.replace('\n', '').replace('\x0b', '')
                    #print('person_goal',person_goal)

                # 找出表格里面的值
                #if 'GraphicFrame' in str(type(shape)) and '任務2 : 團體目標9宮格延伸' in text_frame.text:
            #if 'GraphicFrame' in str(type(shape)) and ('任務2 : 團體目標9宮格' in text_frame.text and '延伸' not in text_frame.text):
            if 'GraphicFrame' in str(type(shape)) and ('一些字眼' in text_frame.text):
                    #print('ok')
                    table = shape.table
                    #print(table)
                    team_goal= table.cell(4, 4).text.replace('\n', '').replace('\x0b', '')
                    #print('team_goal',team_goal)

    # print('person_goal',person_goal)
    # print('team_goal',team_goal)

    return employee_id,person_goal,team_goal



wb_ppt = load_workbook(wrong_excel)
sheet_ppt = wb_ppt['sheet1']
sheet_wrong = wb_ppt['sheet2']

# 遍历文件,读取目标值
talent_data = []
print(len(file_address_list))
i = 1
j = 1
for filename in file_address_list:
    try:
        i = i + 1
        employee_id,person_goal,team_goal = read_ppt_goal(filename)
        talent_data.append([employee_id,person_goal,team_goal])
        print(employee_id,person_goal,team_goal)
        #保存在sheet_ppt里面
        sheet_ppt.cell(row=i, column=1).value = employee_id
        sheet_ppt.cell(row=i, column=2).value = person_goal
        sheet_ppt.cell(row=i, column=3).value = team_goal
        wb_ppt.save(wrong_excel)

    except Exception:
        j = j + 1
        print(filename,"读取失败")
        sheet_wrong.cell(row=j, column=1).value = filename + "读取失败"
        wb_ppt.save(wrong_excel)
        pass

print(talent_data)
print('len of talent data',len(talent_data))

# 将值存放进去excel
talent_wb = load_workbook(talent_excel)
talent_sheet = talent_wb.active
max_row = talent_sheet.max_row
for i in range(2,max_row):
    for listdata in talent_data:
        print(i,listdata)
        if str(talent_sheet.cell(row=i, column=5).value) == str(listdata[0]):
            talent_sheet.cell(row=i, column=20).value = str(listdata[1])
            if talent_sheet.cell(row=i, column=20).value:
                talent_sheet.cell(row=i, column=19).value = 'Y'
            else:
                talent_sheet.cell(row=i, column=19).value = 'N'

            talent_sheet.cell(row=i, column=23).value = str(listdata[2])
            if talent_sheet.cell(row=i, column=23).value:
                talent_sheet.cell(row=i, column=22).value = 'Y'
            else:
                talent_sheet.cell(row=i, column=22).value = 'N'

talent_wb.save(talent_excel)



  • 0
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值