研究生一直做文档解析相关,但是局限于段落文本内容, 对于表格解析没有涉及(如有疑问:可加微信13161411563),
如下图的嵌套表格:
方法一:使用python-docx进行解析:
import docx
from docx.document import Document as _Document
from docx.oxml.text.paragraph import CT_P
from docx.oxml.table import CT_Tbl
from docx.table import _Cell, Table, _Row
from docx.text.paragraph import Paragraph
doc = docx.Document('test.docx')
def table_nested_parsing(cell, current_row, current_col):
for block in cell._element:
if isinstance(block, CT_P):
print(Paragraph(block, cell).text)
if isinstance(block, CT_Tbl):
block = Table(block, cell)
for row in range(len(block.rows)):
for col in range(len(block.columns)):