#coding:utf-8
importos, reimportdocxfrom docx.document importDocument as dcfrom docx.oxml.table importCT_Tblfrom docx.oxml.text.paragraph importCT_Pfrom docx.table import_Cell, Tablefrom docx.text.paragraph importParagraphfrom docx.shared import RGBColor #设置字体颜色
from docx importDocumentfrom docx.shared import Pt #设置字体
from docx.oxml.ns import qn #设置中文字体
importpandas as pd
FILE_PATH= r"D:\xxxx\xxxx\xxxx\xxxx.docx"obj=docx.Document(FILE_PATH)defiter_block_items(parent):#print(‘utils.py ----> iter_block_items:‘, 2)
ifisinstance(parent, dc):
parent_elm=parent.element.bodyelifisinstance(parent, _Cell):
parent_elm=parent._tcelse:raise ValueError("[TypeError] Document in insuitable type.")for child inparent_elm.iterchildren():ifisinstance(child, CT_P):yieldParagraph(child, parent)elifisinstance(child, CT_Tbl):yieldTable(child, parent)deftable2list(table