一、安装python-docx模块
pip install python-docx
二、读取word文件
1.word文档结构
Document: 文档
Paragraph:段落
Run:文字块
2.提取文字
2.1提取段落实例,段数:
.paragraphs 获取一个列表,包含每个段落的实例
from docx import Document
doc = Document("0.docx")
print(doc.paragraphs)
print(len(doc.paragraphs))
结果:
[<docx.text.paragraph.Paragraph object at 0x000001F88E2F2E80>, <docx.text.paragraph.Paragraph object at 0x000001F88E2F2C88>, <docx.text.paragraph.Paragraph object at 0x000001F88E2F2EF0>]
3
结果说明有三段
2.2提取段落内容
from docx import Document
doc = Document("0.docx")
for paragraph in doc.paragraphs:
print(paragraph.text)
以上便是word与python结合的第二部分内容,后续将会持续更新excel,ppt,爬虫,人工智能等相关内容,敬请关注
2.3获取文字块run
一个格式为一个文字块run 上述句子有7个文字块run
from docx import Document
doc = Document("0.docx")
paragraph = doc.paragraphs[1]
runs = paragraph.runs
print(runs)
[<docx.text.run.Run object at 0x000001F88E2F2E10>, <docx.text.run.Run object at 0x000001F88E2F2C88>, <docx.text.run.Run object at 0x000001F88E2F2E80>, <docx.text.run.Run object at 0x000001F88E2F2DD8>, <docx.text.run.Run object at 0x000001F88E2F2EB8>, <docx.text.run.Run object at 0x000001F88E2F2F28>, <docx.text.run.Run object at 0x000001F88E2F2F60>]
paragraph.runs 获取一个列表,得到每个文字块的实例
2.4提取文字块的内容
from docx import Document
doc = Document("0.docx")
paragraph = doc.paragraphs[1]
runs = paragraph.runs
print(runs)
for run in runs:
print(run.text)
excel与python结合的第二部分内容,
后续将会持续更新excel
,
ppt
,
爬虫
,人工智能
三、写入word文件
1.添加文字内容
1.1添加标题
方法:doc.add_heading("标题内容",level=标题等级)
from docx import Document
doc = Document()
doc.add_heading("添加一个一级标题",level=1)
1.2添加段落
方法:doc.add_paragraph("段落文字内容")
from docx import Document
doc = Document()
doc.add_heading("添加一个一级标题",level=1)
paragraph1 = doc.add_paragraph("添加段落1")
paragraph2 = doc.add_paragraph("添加段落2")
1.3添加文字块
方法:add_run("文字内容").bold = True (一些可设置的参数)
from docx import Document
doc = Document()
doc.add_heading("添加一个一级标题",level=1)
paragraph1 = doc.add_paragraph("添加段落1")
paragraph2 = doc.add_paragraph("添加段落2")
paragraph3 = doc.add_paragraph()
paragraph3.add_run("粗体").bold = True
paragraph3.add_run('正常')
paragraph3.add_run('斜体').italic = True
1.4添加分页
方法:doc.add_page_break()
from docx import Document
doc = Document() #新建文件
doc.add_heading("添加一个一级标题",level=1) #标题
paragraph1 = doc.add_paragraph("添加段落1") #段落
paragraph2 = doc.add_paragraph("添加段落2")
paragraph3 = doc.add_paragraph()
paragraph3.add_run("粗体").bold = True #文字块
paragraph3.add_run('正常')
paragraph3.add_run('斜体').italic = True
doc.add_page_break()
2.添加图片and表格
添加图片:
方法:
doc.add_picture("图片地址",width=Cm(设置的宽度))
doc.add_picture("图片地址",height=Cm(设置的高度))
只需要给定一个高度或者宽度,另一个尺寸会根据比列自动计算
from docx import Document
from docx.shared import Cm
doc = Document() #新建文件
doc.add_heading("添加一个一级标题",level=1) #标题
paragraph1 = doc.add_paragraph("添加段落1") #段落
paragraph2 = doc.add_paragraph("添加段落2")
paragraph3 = doc.add_paragraph()
paragraph3.add_run("粗体").bold = True #文字块
paragraph3.add_run('正常')
paragraph3.add_run('斜体').italic = True
doc.add_page_break() #添加分页
doc.add_picture("00.png",width=Cm(2))
doc.add_picture("00.png",height=Cm(3))
添加表格:
方法:doc.add_table(rows=行数,cols=列数)
from docx import Document
from docx.shared import Cm
doc = Document() #新建文件
doc.add_heading("添加一个一级标题",level=1) #标题
paragraph1 = doc.add_paragraph("添加段落1") #段落
paragraph2 = doc.add_paragraph("添加段落2")
paragraph3 = doc.add_paragraph()
paragraph3.add_run("粗体").bold = True #文字块
paragraph3.add_run('正常')
paragraph3.add_run('斜体').italic = True
doc.add_page_break() #添加分页
doc.add_picture("00.png",width=Cm(2))
doc.add_picture("00.png",height=Cm(3))
tabs = [
["姓名",'学号',"成绩"],
['李华',101,93],
['小明',102,94],
['小丽',103,98],
['小张',104,100],
]
table = doc.add_table(rows=4,cols=3)
for row in range(4):
cells = table.rows[row].cells
for col in range(3):
cells[col].text = str(tabs[row][col])
3.保存
doc.save(文件路径)
完整代码:
from docx import Document
from docx.shared import Cm
doc = Document() #新建文件
doc.add_heading("添加一个一级标题",level=1) #标题
paragraph1 = doc.add_paragraph("添加段落1") #段落
paragraph2 = doc.add_paragraph("添加段落2")
paragraph3 = doc.add_paragraph()
paragraph3.add_run("粗体").bold = True #文字块
paragraph3.add_run('正常')
paragraph3.add_run('斜体').italic = True
doc.add_page_break() #添加分页
doc.add_picture("00.png",width=Cm(2))
doc.add_picture("00.png",height=Cm(3))
tabs = [
["姓名",'学号',"成绩"],
['李华',101,93],
['小明',102,94],
['小丽',103,98],
['小张',104,100],
]
table = doc.add_table(rows=4,cols=3)
for row in range(4):
cells = table.rows[row].cells
for col in range(3):
cells[col].text = str(tabs[row][col])
doc.save('10.docx')
结果:
四、调整Word文档样式
1.修改文字样式
from docx import Document
from docx.shared import Pt,RGBColor
from docx.oxml.ns import qn
doc = Document("0.docx")
for paragraph in doc.paragraphs:
for run in paragraph.runs:
run.font.bold = True #黑体
run.font.italic = True #斜体
run.font.underline = True #下划线
run.font.strike = True #删除线
run.font.shadow = True #阴影
run.font.size = Pt(15) #字体大小
run.font.color.rgb = RGBColor(255,255,0) #颜色
run.font.name = "微软雅黑" #字体
r = run._element.rPr.rFonts #中文字体
r.set(qn('w:eastAsia'),"微软雅黑")
doc.save("xin0.docx")
2.修改段落格式
2.1对齐样式:
from docx.enum.text import WD_ALIGN_PARAGRAPH
paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
也可选择其他不同对齐
如:LEFT,RIGHT等
2.2行间距
paragraph.paragraph_foramt.line_spacing = 2.0
paragraph.paragraph_foramt.line_spacing = 2.0
用浮点数,表示两倍行间距
2.3段前和段后间距
paragraph.paragraph_format.space_before = Pt(12)
paragraph.paragraph_format.space_after = Pt(21)
pt(21)表示21磅
3.整体代码
from docx import Document
from docx.shared import Pt,RGBColor
from docx.oxml.ns import qn
from docx.enum.text import WD_ALIGN_PARAGRAPH
doc = Document("0.docx")
for paragraph in doc.paragraphs:
for run in paragraph.runs:
run.font.bold = True #黑体
run.font.italic = True #斜体
run.font.underline = True #下划线
run.font.strike = True #删除线
run.font.shadow = True #阴影
run.font.size = Pt(15) #字体大小
run.font.color.rgb = RGBColor(255,255,0) #颜色
run.font.name = "微软雅黑" #字体
r = run._element.rPr.rFonts #中文字体
r.set(qn('w:eastAsia'),"微软雅黑")
paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER #duiqi
paragraph.paragraph_format.line_spacing = 2.0 #用浮点数,表示两倍行间距
paragraph.paragraph_format.space_before = Pt(12) #pt(21)表示21磅
paragraph.paragraph_format.space_after = Pt(21)
doc.save("xin1.docx")
结果: