#记录学习过程
因为遇到纵向合并表格问题,加上一些情况要用到win32com,所以使用以下思路:
读取表格的单元格,将相应的行的单元格进行拼接,如有更好的方法,欢迎交流~
import win32com.client as win32
def read_word_tables_with_win32(file_path):
word = win32.Dispatch('Word.Application')
doc = word.Documents.Open(file_path)
tables_content = []
for table in doc.Tables:
rows_content = {}
for cell in table.Range.Cells:
row_idx = cell.RowIndex
cell_text = cell.Range.Text.strip().replace('\r', '').replace('\x07', '')
if row_idx not in rows_content:
rows_content[row_idx] = []
rows_content[row_idx].append(cell_text)
# Join each row's cell contents into a single string
data = [' '.join(rows_content[row]) for row in sorted(rows_content.keys())]
tables_content.append(data)
doc.Close()
word.Quit()
return tables_content
tables = read_word_tables_with_win32(r"你的文档.docx")
for i, table in enumerate(tables):
print(f'Table {i}:\n', table)