1、word文档操作
1.1 python-docx操作word
1.1.1 文档的创建、打开和保存
一级 | 二级 |
---|---|
Document(path) | |
创建或打开文档 | save(path) |
保存文档 |
1.1.2 内容的获取
一级 | 二级 | 三级 | 四级 | 五级 | 六级 |
---|---|---|---|---|---|
Document(path) | |||||
创建或打开文档 | paragraphs[index] | ||||
文档段落对象 | text | ||||
文本内容 | |||||
tables[index] | |||||
表格对象 | row[index] | ||||
行对象 | cells[index] | ||||
单元格对象 | text | ||||
文本内容 | |||||
1.1.3 顺序获取(代码)
import os
import docx
from docx.document import Document
from docx.oxml.table import CT_Tbl
from docx.oxml.text.paragraph import CT_P
from docx.table import _Cell, Table
from docx.text.paragraph import Paragraph
from mailmerge import MailMerge
def iter_block_items(parent):
"""
Yield each paragraph and table child within *parent*, in document order.
Each returned value is an instance of either Table or Paragraph. *parent*
would most commonly be a reference to a main Document object, but
also works for a _Cell object, which itself can contain paragraphs and tables.
"""
if isinstance(parent, Document):
parent_elm = parent.element.body
elif isinstance(parent, _Cell):
parent_elm = parent._tc
else:
raise ValueError("something's not right")
for child in parent_elm.iterchildren():
if isinstance(child, CT_P):
yield Paragraph(child, parent)
elif isinstance(child, CT_Tbl):
yield Table(child, parent)
def read_table(table):
return [[cell.text for cell in row.cells] for row in table.rows]
def read_word(word_path):
doc = docx.Document(word_path)
1.2 docxtpl操作word模板
1.2.1 word文档中的前处理
将需要替换的内容用{
{变量名}}表示,渲染前:
1.2.2 变量渲染(代码)
from docxtpl import DocxTemplate,InlineImage
# 引入尺寸参数
from docx.shared import Mm, Inches, Pt
# 加载模板
tpl = DocxTemplate('模板.docx')
# 载入图片
img_1 = InlineImage(tpl, image_descriptor=f'../files/01.png',width=Mm(145))
# 组合变量
context = {
'title_1': "标题",
'text_1': "文字1",
'text_2': "文字2",
'list_1': ["遍历01", "遍历02", "遍历03"],
'if_var': True,
"img_1":img_1
}
# 渲染变量
tpl.render(context)
# 保存文件
tpl.save('生成结果.docx')
渲染后:
1.3 word文档合并
1.3.1 方法一:pywin32库方式(代码)
from os.path import abspath
from win32com import client
def main(files, final_docx):
# 启动word应用程序
word = client.gencache.EnsureDispatch("Word.Application")
word.Visible = True
# 新建空白文档
new_document = word.Documents.Add()
for fn in files:
# 打开要合并的每个文件,复制其中的内容到剪切板,然后关闭文件
fn = abspath(fn)
temp_document = word.Documents.Open(fn)
# 选择整个文件内容
word.Selection.WholeStory()
# 复制内容
word.Selection.Copy()
temp_document.Close()