需要引入第三方库
python -m pip install python-docx
一个简单的样例:
背景:每个段落中有若干个表格,需要摘取部分段落中的所有表格信息并输出到excel中。
import os
import re
from docx import Document
import openpyxl as op
class DocxReader:
def __init__(self, srcfile, dstfile):
self.srcfile = srcfile
self.dstfile = dstfile
self.docx = None
self.get_document()
def get_document(self):
if os.path.exists(self.srcfile):
self.docx = Document(self.srcfile)
def get_all_interested_info(self):
if not self.docx:
return
start_flag = False
patch_version = None
tid = 0
tables = self.docx.tables
table_size = len(tables)
data = [["设备形态", "补丁号", "问题序号", "问题单号", "问题现象", "问题影响", "严重级别"]]
interest =