#python批量处理会议记录word导入excel(word to excel)
import os
from docx import Document
import openpyxl
from openpyxl import load_workbook
def get_meet_content(filename):
#提取会议信息(会议记录格式类似)
meet_dict = {} #以字典存放会议各要素内容
doc = Document(filename)
cjry = ''
for paragraph in doc.paragraphs[0:9]:
paragraph.text = paragraph.text.strip().replace(' ','') #去空格
if 'dw' in paragraph.text:
meet_dict['单位']='zddw'
if '时间' in paragraph.text:
print(paragraph.text)
meet_time = paragraph.text.replace('时间:','')#替换“时间:”,获取时间信息
meet_dict['时间']=meet_time
if '党委' in paragraph.text:
meet_dict['会议名称']='党委会'
if '院长办' in paragraph.text:
meet_dict['会议名称']='院长办公会'
if '地点:' in paragraph.text:
place = paragraph.text.replace('地点:','')
meet_dict['会议地点']=place
if '主持人' in paragraph.text:
cjry = cjry + paragraph.text.replace('主持人:','')
if '出席:' in paragraph.text:
cjry = cjry + paragraph.text.replace('出席:','')
if '列席:' in paragraph.text:
cjry = cjry + paragraph.text.replace('列席:','')
meet_dict['参加人员']=cjry
content = ' ' #会议内容正文信息
for paragraph in doc.paragraphs[9:]:
paragraph.text = paragraph.text.strip().replace(' ','')
content =content + paragraph.text
meet_dict['会议内容']=content
list = ['',
meet_dict['单位'],
meet_dict['时间'],
'',
meet_dict['会议名称'],
meet_dict['会议地点'],
meet_dict['参加人员'],
meet_dict['会议内容'],
'',
'']
#空格对应excel表单元格
list.append(filename)
return list
#输入excel表
def input_excel(list):
excel_file = 'G:\\工作记录\\工作任务清单.xlsx'
wb = load_workbook(excel_file )
ws = wb['会议记录']
ws.append(list)
wb.save(excel_file )
if __name__ == '__main__':
path = 'G:\\会议纪录\\'
os.chdir(path)
i = 0
for filename in os.listdir(path) :
if filename.endswith('docx'):
print(filename)
list = get_meet_content(filename)
input_excel(list)
i = i+1
print('******No.'+str(i)+' file input over******')
print('ok')