Word表格转换到Excel中_word转为excel java 保留格式和图片-CSDN博客

本文链接：https://blog.csdn.net/sinat_39654987/article/details/125153027

该博客介绍了如何在MAC系统中，使用Python的`python-docx`和`xlsxwriter`库来读取Word文档中的表格，并将其内容写入Excel文件，以便进行数据筛选操作。首先，通过`get_files`函数获取所有.docx文件，然后使用`read_docx`函数解析表格数据，最后利用`write_excel`函数将数据写入Excel文件。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

需求：MAC把word文档中的表格解析后，存入excel中，方便数据筛选

'''
Read data from docx file and write it to excel file.

requests:
    pip3 install python-docx
    pip3 install xlsxwriter
mac不支持win32com
'''
import docx
import xlsxwriter
import os
import re

# get all docx files in the directory
def get_files(dir_name):
    files = []
    for file in os.listdir(dir_name): #返回指定目录下的所有文件和目录名
        if file.endswith(".docx"):  #判断字符串是否以指定字符或⼦字符串结尾，返回布尔值
            files.append(file)
    return files

# read data from docx file
def read_docx(file_name):
    doc = docx.Document(file_name) #读入文件
    tables = doc.tables
    table = tables[0] #通过下标,获取文件中的第一个表格
    # for row in table.rows:
    #     for cell in row.cells:
    #         print(cell.text)
    #第一类：身高，re.findall()提取数，返回的是数组
    height = re.findall(r"\d*?cm",table.cell(0,1).text)[0] #正则表达式*？遇到\d开始和cm结束就进行截取
    ##第二类:血压，要用split和[]截取
    bld_str = table.cell(1,1).text.split(' ')[0] #以空格为分隔符
    bld_presure_high = bld_str.split('/')[0]
    bld_presure_low = bld_str.split('/')[1]
    #第三类:爱好，只需要text即可
    fav = table.cell(2,1).text
    return (height,bld_presure_high,bld_presure_low,fav)

# write data to excel file
def write_excel(row,col,data):   
    for d in data:
        worksheet.write(row, col, d)
        col += 1


if __name__ == '__main__':
    dir_name = "/Users/……/Desktop/word"
    row = 0
    col = 0
    workbook = xlsxwriter.Workbook("/Users/……/Desktop/word/excel.xlsx")
    worksheet = workbook.add_worksheet()
    for file in get_files(dir_name):
        data = read_docx(dir_name + "/" + file)
        print(data)
        write_excel(row,col,data) 
        row += 1
    workbook.close()