python怎么获取word文档的章节_python读取word文档

安装python的docx支持python -m pip install python-docx# -*- coding: utf-8 -*-

import docx

from docx import Document

path = \"./file2.docx\"

document = Document(path)

def readlist():

table_count = 0

row_count = 0

textlist = []

for paragraph in document.paragraphs:

r_text = paragraph.text

textlist.append(r_text)

row_count = row_count+1

if r_text.startswith(u\'表名\'):

# print row_count

tablenamec = textlist[row_count-2]

tablenamearr = textlist[row_count-1].split(u\':\')

tablename = tablenamearr[len(tablenamearr)-1]

print tablenamec,tablename

readtable(table_count)

textlist = []

row_count=0

table_count =table_count+1

def readtable(table_count):

# 遍历所有表格

tables = document.tables

table = tables[table_count]

rowlen = len(table.rows)

if rowlen>1:

collen = len(table.rows[0].cells)

row_num = 0

for row in table.rows:

if(row_num>0):

colname = row.cells[1].text

colnamec = row.cells[2].text

bz = \"\"

if(collen>4):

bz = row.cells[4].text

else:

bz = row.cells[3].text

sql = u\"insert into tablename (a,b,c,d) values(\'{0}\',\'{1}\',\'{2}\',\'{3}\')\".format(row_num,colname,colnamec ,bz)

# print sql

row_num = row_num +1

#

# for table in document.tables:

# for row in table.rows:

# print row.cells[2].text

if __name__ ==\"__main__\":

print \"start\"

readlist()

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值