Python自动化 | 利用python-docx 实现word操作

一、安装python-docx模块

pip install python-docx

二、读取word文件

1.word文档结构

Document: 文档
Paragraph:段落
Run:文字块

2.提取文字
2.1提取段落实例,段数:

.paragraphs  获取一个列表,包含每个段落的实例
from docx import Document

doc = Document("0.docx")
print(doc.paragraphs)
print(len(doc.paragraphs))

结果:

[<docx.text.paragraph.Paragraph object at 0x000001F88E2F2E80>, <docx.text.paragraph.Paragraph object at 0x000001F88E2F2C88>, <docx.text.paragraph.Paragraph object at 0x000001F88E2F2EF0>]
3

结果说明有三段
2.2提取段落内容

from docx import Document

doc = Document("0.docx")
for paragraph in doc.paragraphs:
	print(paragraph.text)
以上便是word与python结合的第二部分内容,后续将会持续更新excel,ppt,爬虫,人工智能等相关内容,敬请关注

 

2.3获取文字块run

一个格式为一个文字块run 上述句子有7个文字块run

from docx import Document

doc = Document("0.docx")
paragraph = doc.paragraphs[1]
runs = paragraph.runs
print(runs)
[<docx.text.run.Run object at 0x000001F88E2F2E10>, <docx.text.run.Run object at 0x000001F88E2F2C88>, <docx.text.run.Run object at 0x000001F88E2F2E80>, <docx.text.run.Run object at 0x000001F88E2F2DD8>, <docx.text.run.Run object at 0x000001F88E2F2EB8>, <docx.text.run.Run object at 0x000001F88E2F2F28>, <docx.text.run.Run object at 0x000001F88E2F2F60>]

paragraph.runs 获取一个列表,得到每个文字块的实例

2.4提取文字块的内容

from docx import Document

doc = Document("0.docx")
paragraph = doc.paragraphs[1]
runs = paragraph.runs
print(runs)
for run in runs:
	print(run.text)
excel与python结合的第二部分内容,
后续将会持续更新excel
,
ppt
,
爬虫
,人工智能

三、写入word文件

1.添加文字内容
1.1添加标题

方法:doc.add_heading("标题内容",level=标题等级)
from docx import Document

doc = Document()
doc.add_heading("添加一个一级标题",level=1)

1.2添加段落

方法:doc.add_paragraph("段落文字内容")
from docx import Document

doc = Document()
doc.add_heading("添加一个一级标题",level=1)
paragraph1 = doc.add_paragraph("添加段落1")
paragraph2 = doc.add_paragraph("添加段落2")

1.3添加文字块

方法:add_run("文字内容").bold = True (一些可设置的参数)
from docx import Document

doc = Document()
doc.add_heading("添加一个一级标题",level=1)
paragraph1 = doc.add_paragraph("添加段落1")
paragraph2 = doc.add_paragraph("添加段落2")
paragraph3 = doc.add_paragraph()
paragraph3.add_run("粗体").bold = True
paragraph3.add_run('正常')
paragraph3.add_run('斜体').italic = True

1.4添加分页

方法:doc.add_page_break()
from docx import Document

doc = Document()							#新建文件
doc.add_heading("添加一个一级标题",level=1)	#标题
paragraph1 = doc.add_paragraph("添加段落1")  #段落
paragraph2 = doc.add_paragraph("添加段落2")
paragraph3 = doc.add_paragraph()
paragraph3.add_run("粗体").bold = True		#文字块
paragraph3.add_run('正常')
paragraph3.add_run('斜体').italic = True
doc.add_page_break()

2.添加图片and表格
添加图片:

方法:
doc.add_picture("图片地址",width=Cm(设置的宽度))
doc.add_picture("图片地址",height=Cm(设置的高度))
只需要给定一个高度或者宽度,另一个尺寸会根据比列自动计算
from docx import Document
from docx.shared import Cm

doc = Document()							#新建文件
doc.add_heading("添加一个一级标题",level=1)	#标题
paragraph1 = doc.add_paragraph("添加段落1")  #段落
paragraph2 = doc.add_paragraph("添加段落2")
paragraph3 = doc.add_paragraph()
paragraph3.add_run("粗体").bold = True		#文字块
paragraph3.add_run('正常')
paragraph3.add_run('斜体').italic = True
doc.add_page_break()						#添加分页
doc.add_picture("00.png",width=Cm(2))
doc.add_picture("00.png",height=Cm(3))

添加表格:

方法:doc.add_table(rows=行数,cols=列数)  
from docx import Document
from docx.shared import Cm

doc = Document()							#新建文件
doc.add_heading("添加一个一级标题",level=1)	#标题
paragraph1 = doc.add_paragraph("添加段落1")  #段落
paragraph2 = doc.add_paragraph("添加段落2")
paragraph3 = doc.add_paragraph()
paragraph3.add_run("粗体").bold = True		#文字块
paragraph3.add_run('正常')
paragraph3.add_run('斜体').italic = True
doc.add_page_break()						#添加分页
doc.add_picture("00.png",width=Cm(2))
doc.add_picture("00.png",height=Cm(3))

tabs = [
	["姓名",'学号',"成绩"],
	['李华',101,93],
	['小明',102,94],
	['小丽',103,98],
	['小张',104,100],
]
table = doc.add_table(rows=4,cols=3)
for row in range(4):
	cells = table.rows[row].cells
	for col in range(3):
		cells[col].text = str(tabs[row][col])

3.保存

doc.save(文件路径)

完整代码:

from docx import Document
from docx.shared import Cm
doc = Document()							#新建文件
doc.add_heading("添加一个一级标题",level=1)	#标题
paragraph1 = doc.add_paragraph("添加段落1")  #段落
paragraph2 = doc.add_paragraph("添加段落2")
paragraph3 = doc.add_paragraph()
paragraph3.add_run("粗体").bold = True		#文字块
paragraph3.add_run('正常')
paragraph3.add_run('斜体').italic = True
doc.add_page_break()						#添加分页
doc.add_picture("00.png",width=Cm(2))
doc.add_picture("00.png",height=Cm(3))

tabs = [
	["姓名",'学号',"成绩"],
	['李华',101,93],
	['小明',102,94],
	['小丽',103,98],
	['小张',104,100],
]
table = doc.add_table(rows=4,cols=3)
for row in range(4):
	cells = table.rows[row].cells
	for col in range(3):
		cells[col].text = str(tabs[row][col])
doc.save('10.docx')

结果:
在这里插入图片描述
在这里插入图片描述

 

四、调整Word文档样式

1.修改文字样式

from docx import Document
from docx.shared import Pt,RGBColor
from docx.oxml.ns import qn

doc = Document("0.docx")
for paragraph in doc.paragraphs:
	for run in paragraph.runs:
		run.font.bold = True     	#黑体
		run.font.italic = True		#斜体
		run.font.underline = True	#下划线
		run.font.strike = True		#删除线
		run.font.shadow = True		#阴影
		run.font.size = Pt(15)		#字体大小
		run.font.color.rgb = RGBColor(255,255,0)	#颜色
		run.font.name = "微软雅黑"					#字体
		r = run._element.rPr.rFonts					#中文字体
		r.set(qn('w:eastAsia'),"微软雅黑")

doc.save("xin0.docx")

2.修改段落格式
2.1对齐样式:

from docx.enum.text import WD_ALIGN_PARAGRAPH


paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER

也可选择其他不同对齐
如:LEFT,RIGHT等
2.2行间距

paragraph.paragraph_foramt.line_spacing = 2.0 
paragraph.paragraph_foramt.line_spacing = 2.0 
用浮点数,表示两倍行间距

2.3段前和段后间距

paragraph.paragraph_format.space_before = Pt(12)
paragraph.paragraph_format.space_after = Pt(21)
pt(21)表示21磅

3.整体代码

from docx import Document
from docx.shared import Pt,RGBColor
from docx.oxml.ns import qn
from docx.enum.text import WD_ALIGN_PARAGRAPH



doc = Document("0.docx")
for paragraph in doc.paragraphs:
	for run in paragraph.runs:
		run.font.bold = True     	#黑体
		run.font.italic = True		#斜体
		run.font.underline = True	#下划线
		run.font.strike = True		#删除线
		run.font.shadow = True		#阴影
		run.font.size = Pt(15)		#字体大小
		run.font.color.rgb = RGBColor(255,255,0)	#颜色
		run.font.name = "微软雅黑"					#字体
		r = run._element.rPr.rFonts					#中文字体
		r.set(qn('w:eastAsia'),"微软雅黑")

		paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER   #duiqi
		paragraph.paragraph_format.line_spacing = 2.0     #用浮点数,表示两倍行间距

		paragraph.paragraph_format.space_before = Pt(12)	#pt(21)表示21磅
		paragraph.paragraph_format.space_after = Pt(21)

doc.save("xin1.docx")

结果:
在这里插入图片描述

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值