批量提取(Word->Excel)会议通知书关键信息
源代码:
from docx import Document
from openpyxl import Workbook
import glob
path = r'E:\0616\untitled\wxhs\Batch processing\会议通知'
workbook = Workbook()
sheet = workbook.active
header = ['序号','时间','地点','参加人员','内容']
sheet.append(header)
number = 0
for file in glob.glob(path+r'\*.docx'):
content_list = []
meeting_file = Document(file)
for paragraph in meeting_file.paragraphs:
# 可以知道每一行
# print(paragraph.text)
if paragraph.text[0:3] == '时间:':
mtime = paragraph.text[3:]
if paragraph.text[0:3] == '地点:':
mplace = paragraph.text[3:]
if paragraph.text[0:5] == '参加人员:':
mpeople = paragraph.text[5:]
if len(paragraph.text)>=2:
if paragraph.text[0].isdigit() and paragraph.text[1] == '.':
content_list.append(paragraph.text)
content = ';'.join(content_list)
number += 1
sheet.append([number,mtime,mplace,mpeople,content])
workbook.save('./会议提取关键信息.xlsx')