需求:MAC把word文档中的表格解析后,存入excel中,方便数据筛选
'''
Read data from docx file and write it to excel file.
requests:
pip3 install python-docx
pip3 install xlsxwriter
mac不支持win32com
'''
import docx
import xlsxwriter
import os
import re
# get all docx files in the directory
def get_files(dir_name):
files = []
for file in os.listdir(dir_name): #返回指定目录下的所有文件和目录名
if file.endswith(".docx"): #判断字符串是否以指定字符或⼦字符串结尾,返回布尔值
files.append(file)
return files
# read data from docx file
def read_docx(file_name):
doc = docx.Document(file_name) #读入文件
tables = doc.tables
table = tables[0] #通过下标,获取文件中的第一个表格
# for row in table.rows:
# for cell in row.cells:
# print(cell.text)
#第一类:身高,re.findall()提取数,返回的是数组
height = re.findall(r"\d*?cm",table.cell(0,1).text)[0] #正则表达式*?遇到\d开始和cm结束就进行截取
##第二类:血压,要用split和[]截取
bld_str = table.cell(1,1).text.split(' ')[0] #以空格为分隔符
bld_presure_high = bld_str.split('/')[0]
bld_presure_low = bld_str.split('/')[1]
#第三类:爱好,只需要text即可
fav = table.cell(2,1).text
return (height,bld_presure_high,bld_presure_low,fav)
# write data to excel file
def write_excel(row,col,data):
for d in data:
worksheet.write(row, col, d)
col += 1
if __name__ == '__main__':
dir_name = "/Users/……/Desktop/word"
row = 0
col = 0
workbook = xlsxwriter.Workbook("/Users/……/Desktop/word/excel.xlsx")
worksheet = workbook.add_worksheet()
for file in get_files(dir_name):
data = read_docx(dir_name + "/" + file)
print(data)
write_excel(row,col,data)
row += 1
workbook.close()


