文件内容如图:
from win32com import client as wc
from docx import Document
def word_convert():
account_list = []
new_file_name = 'C:/名单2.docx'
file_exists = os.path.exists(new_file_name)
if not file_exists:
word = wc.Dispatch('Word.Application')
doc = word.Documents.Open('C:/名单.doc')
doc.SaveAs('C:/名单2.docx', 12, False, "", True, "", False, False, False,False) # 转化后路径下的文件
doc.Close()
word.Quit()
document = Document(new_file_name)
tables = document.tables
for table in tables:
# 行列个数
row_count = len(table.rows)
col_count = len(table.columns)
for i in range(row_count):
row = table.rows[i].cells
if i == 0:
continue
for j in range(col_count):
value_text = row[j].text
if j == 1:
account_list.append(value_text) # 我这里获取的是第二列
return account_list
先把 doc 文件转换为 docx 读取