import docx from docx.document import Document as _Document from docx.oxml.text.paragraph import CT_P from docx.oxml.table import CT_Tbl from docx.table import _Cell, Table, _Row from docx.text.paragraph import Paragraph doc = docx.Document(r'C:\Users\WURUI\Desktop\001.docx') def table_nested_parsing(cell, current_row, current_col): for block in cell._element: if isinstance(block, CT_P): print(f'{cell.text}---文本:{Paragraph(block, cell).text}---小行:{current_row}----小列:{current_col}') if Paragraph(block, cell).text == '111.638' : Paragraph(block, cell).text ='123456' doc.save(r'C:\Users\WURUI\Desktop\111.docx') if Paragraph(block, cell).text == '119.345' : Paragraph(block, cell).text ='6666666' doc.save(r'C:\Users\WURUI\Desktop\111.docx') if isinstance(block, CT_Tbl): block = Table(block, cell) for row in range(len(block.rows)): for col in range(len(block.columns)): cell_table = block.cell(row, col) table_nested_parsing(cell_table, row, col) def doc_parsing(doc): doc_list = [] for doc_part in doc.element.body: # if isinstance(doc_part, CT_P): # print(Paragraph(doc_part, doc).text) # 对段落内容直接打印 if isinstance(doc_part, CT_Tbl): tb1 = Table(doc_part, doc) for row in range(len(tb1.rows)): for col in range(len(tb1.columns)): print(f'大行:{row}-----大列:{col}')#34行有问题,11列 cell_table = tb1.cell(row, col) table_nested_parsing(cell_table, row, col) doc_parsing(doc)
python解析测试(1)
最新推荐文章于 2024-07-28 15:46:11 发布