一、第三方库python-docx
1.1 python-docx介绍
python-docx is a Python library for creating and updating Microsoft Word (.docx) files.
1.2 python-docx安装
使用pip命令安装python-docx
pip install python-docx
1.3 快速开始
文档内容
获取段落
from docx import Document
document = Document('C:/Users/webbe/Desktop/word.docx')
# 段落对象列表
paragraphs = document.paragraphs
print(paragraphs)
for paragraph in paragraphs:
# 获取段落里面的内容
print(paragraph.text)
[<docx.text.paragraph.Paragraph object at 0x0000016EF1450A30>, <docx.text.paragraph.Paragraph object at 0x0000016EF1450970>, <docx.text.paragraph.Paragraph object at 0x0000016EF1450C10>, <docx.text.paragraph.Paragraph object at 0x0000016EF1450A60>, <docx.text.paragraph.Paragraph object at 0x0000016EF14509D0>]
这是一级标题
这是二级标题
这是文本 这是文本 这是文本 这是文本 这是文本 这是文本
获取块 块是指一个段落中不同格式的文字段
# 获取块
from docx import Document
document = Document('C:/Users/webbe/Desktop/word.docx')
# 段落对象列表
paragraphs = document.paragraphs
third_paragraph = paragraphs[2]
block = third_paragraph.runs
for words in block:
print(words.text)
这是文本
这是文本
这是文本
这是文本这是文本这是文本
获取一级标题
# 获取一级标题
for paragraph in paragraphs:
if paragraph.style.name == 'Heading 1':
print(paragraph.text)
# 这是一级标题
获取二级标题
# 获取二级标题
for paragraph in paragraphs:
if paragraph.style.name == 'Heading 2':
print(paragraph.text)
# 这是二级标题
获取正文
# 获取正文
for paragraph in paragraphs:
if paragraph.style.name == 'Normal':
print(paragraph.text)
添加一级标题
from docx import Document
document = Document('C:/Users/webbe/Desktop/word.docx')
# text指的是标题内容 level指的是标题级别
document.add_heading(text='这是一级标题', level=1)
document.save('C:/Users/webbe/Desktop/word.docx')
添加段落
from docx import Document
document = Document('C:/Users/webbe/Desktop/word.docx')
p = document.add_paragraph('A plain paragraph having some ')
document.save('C:/Users/webbe/Desktop/word.docx')
添加分页符
from docx import Document
document = Document('C:/Users/webbe/Desktop/word.docx')
document.add_page_break()
# 先添加分页符
p = document.add_paragraph('Lorem ipsum dolor sit amet.')
# 在添加段落
document.save('C:/Users/webbe/Desktop/word.docx')
添加块
from docx import Document
document = Document('C:/Users/webbe/Desktop/word.docx')
p = document.add_paragraph('Lorem ipsum dolor sit amet.')
p.add_run('加粗').bold = True
p.add_run('普通')
p.add_run('斜体').italic = True
document.save('C:/Users/webbe/Desktop/word.docx')