Python处理word文件
高版本word可以转低版本word ,反之不行。
- doc(通过win32接口,转换成docx进行操作)
- docx(Python-docx)
读docx(读word里的表格,):
data = docx.Document(os.path.join(path, "{}".format(name)))
table = data.tables[0].rows[0].cells # 所有行索引
ta = data.tables
s = [i.text for i in table]
# 培育对象姓名 企业名称
# 根据行索引确定位置
for index, i in enumerate(s):
print(index, i)
if '城区' in i:
chengqu = index
if '对象' in i or '申报人' in i:
shenbaoduixiang = index
if '名称' in i:
mingxheng = index
# 再取数据
for i in ta:
rows = i.rows
print(len(rows))
for j in range(1, len(rows)):
dic = {}
name = ta[0].cell(j, shenbaoduixiang).text
company = ta[0].cell(j, mingxheng).text
area = ta[0].cell(j, chengqu).text
dic['name'] = name
dic['company'] = company
dic['area'] = area
dic['city'] = '杭州'
sum.append(dic)
win32(doc–>docx)
abspath = os.path.abspath('.').split('.')[0]
full = os.path.join(abspath, 'outstanding_talent')
# win32接口:读文件 支持绝对路径;
word = wc.Dispatch("Word.Application")
doc = word.Documents.Open(os.path.join(full, "{}".format(name)))
new_name = name.split('.')[0]+'.docx'
doc.SaveAs(os.path.join(full, "{}".format(new_name)), 12)
doc.Close()
word.Quit()
os.remove(os.path.join(full, "{}".format(name)))