python办公自动化---word文档

怪人陈先生

已于 2022-02-17 10:11:10 修改

阅读量752

点赞数 1

分类专栏： python word 文章标签： python 开发语言后端

于 2020-07-13 19:02:16 首次发布

原文链接：www.baidu.com

版权

python 同时被 2 个专栏收录

14 篇文章 0 订阅

订阅专栏

word

1 篇文章 0 订阅

订阅专栏

安装包：pip install python-docx

在这里插入图片描述

word文件处理：要有批量重复的工作前提，word文档按每段来自定义处理，逻辑简单

小案例1 入门–批量通知客户黄金的价格（纯文字）

from docx import Document
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.shared import Pt #磅数
from docx.oxml.ns import qn #中文格式
#  以上是docx库中需要用到的部分
import time

price = input('请输入今日价格')
company_list = ['客户1',"客户2","客户3","客户4","客户5"]

#strftime() 函数接收以时间元组，并返回以可读字符串表示的当地时间，格式由参数format决定
today1 = time.strftime("%Y-%m-%d", time.localtime())
today2 = time.strftime("%Y/%m/%d",time.localtime())
today3 = time.strftime("year-%Y month-%m day-%d",time.localtime())
。

today = time.strftime("%Y{y}%m{m}%d{d}",time.localtime()).format(y="年",m="月",d="日")
# print(today1,today2,today3)
# print(today)

for i in company_list:
	document = Document()
	#设置文档的基础字体
	document.styles['Normal'].font.name = u"宋体"
	#设置文档的基础中文字体(因为默认字体不生效，所以要指定中文字体）
	document.styles['Normal']._element.rPr.rFonts.set(qn('w:eastAsia'),u'宋体')

	# 初始化建立第一个自然段
	p1 = document.add_paragraph()
	#对齐方式为居中，没有这句的话默认左对齐
	p1.alignment = WD_ALIGN_PARAGRAPH.CENTER

	run1 = p1.add_run("关于下达%s产品的价格通知" %(today))
	run1.font.name = '微软雅黑'
	run1._element.rPr.rFonts.set(qn('w:eastAsia'),u'微软雅黑')
	run1.font.size = Pt(21)  #设置字体大小
	run1.font.bold = True    #设置加粗
	p1.space_after = Pt(5)   # 段后距离5磅
	p1.space_before = Pt(5)


	p2 = document.add_paragraph()
	run2 = p2.add_run(i + "：")  #这里是对客户的称呼
	run2.font.name = '仿宋_GB2312'
	run2._element.rPr.rFonts.set(qn('w:eastAsia'), u'仿宋')
	run2.font.size = Pt(16)
	run2.font.bold = True

	p3 = document.add_paragraph()
	run3 = p3.add_run('     根据公司安排，危提供客户优质服务，我单位拟定今日黄金价格为%s元，特此通知' % price)  #这里是对客户的称呼
	run3.font.name = '仿宋_GB2312'
	run3._element.rPr.rFonts.set(qn('w:eastAsia'), u'仿宋')
	run3.font.size = Pt(16)
	run3.font.bold = True

	p4 = document.add_paragraph()
	p4.alignment = WD_ALIGN_PARAGRAPH.CENTER
	run4 = p4.add_run('（联系人：小杨      电话：188888888）')
	run3._element.rPr.rFonts.set(qn('w:eastAsia'), u'仿宋')
	run3.font.size = Pt(16)
	run3.font.bold = True

	document.save('%s-价格通知.docx' %i)

小案例2 入门–批量通知客户黄金的价格(加图片和表格）

from docx import Document
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.shared import Pt #磅数
from docx.oxml.ns import qn #中文格式
from docx.shared import Inches  #图片尺寸
#  以上是docx库中需要用到的部分

import time

price = input('请输入今日价格')
company_list = ['客户1',"客户2","客户3","客户4","客户5"]

today = time.strftime("%Y{y}%m{m}%d{d}",time.localtime()).format(y="年",m="月",d="日")

for i in company_list:
	document = Document()
	#设置文档的基础字体
	document.styles['Normal'].font.name = u"微软雅黑"
	#设置文档的基础中文字体(因为默认字体不生效，所以要指定中文字体）
	document.styles['Normal']._element.rPr.rFonts.set(qn('w:eastAsia'),u'微软雅黑')

	#在文件最上头插入图片作为文件红头，宽度为6英寸
	document.add_picture('test.jpg',width=Inches(6))


	# 初始化建立第一个自然段
	p1 = document.add_paragraph()
	#对齐方式为居中，没有这句的话默认左对齐
	p1.alignment = WD_ALIGN_PARAGRAPH.CENTER

	run1 = p1.add_run("关于下达%s产品的价格通知" %(today))
	run1.font.name = '微软雅黑'
	run1._element.rPr.rFonts.set(qn('w:eastAsia'),u'微软雅黑')
	run1.font.size = Pt(21)  #设置字体大小
	run1.font.bold = True    #设置加粗
	p1.space_after = Pt(5)   # 段后距离5磅
	p1.space_before = Pt(5)


	p2 = document.add_paragraph()
	run2 = p2.add_run(i + "：")  #这里是对客户的称呼
	run2.font.name = '仿宋_GB2312'
	run2._element.rPr.rFonts.set(qn('w:eastAsia'), u'仿宋')
	run2.font.size = Pt(16)
	run2.font.bold = True

	p3 = document.add_paragraph()
	run3 = p3.add_run('     根据公司安排，危提供客户优质服务，我单位拟定今日黄金价格为%s元，特此通知' % price)  #这里是对客户的称呼
	run3.font.name = '仿宋_GB2312'
	run3._element.rPr.rFonts.set(qn('w:eastAsia'), u'仿宋_GB2312')
	run3.font.size = Pt(16)
	run3.font.bold = True

	table = document.add_table(rows=3,cols=3, style='Table Grid')
	table.cell(0, 0).merge(table.cell(0, 2))
	table_run1 = table.cell(0, 0).paragraphs[0].add_run("XX产品报价表")
	table_run1.font.name = u'隶书'
	table_run1._element.rPr.rFonts.set(qn('w:eastAsia'), u'隶书')
	table.cell(0, 0).paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.CENTER

	table.cell(1, 0).text = '日期'  #第一行第0烈
	table.cell(1, 1).text = '价格'
	table.cell(1, 2).text = '备注'
	table.cell(2, 0).text = today
	table.cell(2, 1).text = str(price)
	table.cell(2, 2).text = ''


	p4 = document.add_paragraph()
	p4.alignment = WD_ALIGN_PARAGRAPH.CENTER
	run4 = p4.add_run('（联系人：小杨      电话：188888888）')
	run4.font.name = '仿宋_GB2312'
	run4._element.rPr.rFonts.set(qn('w:eastAsia'), u'仿宋_GB2312')
	run4.font.size = Pt(16)
	run4.font.bold = True

	document.add_page_break()

	p5 = document.add_paragraph()
	run5 = p5.add_run("此处是广告")

	document.save('%s-价格通知.docx' %i)

ptyhon读取word文档的数据（文字段落/ 表格）

读取word文档段落文字

from docx import Document

document = Document("文字.docx")

all_paragraphs = document.paragraphs

for paragraph in all_paragraphs:
	print(paragraph.text)         #打印每个段落文字，包括中间空白的段落
	#for run in paragraph.runs:
		#print(run.text)     #run.text会把段落割裂，一般不使用，使用段落打印即可

读取word文档表格数据

document = Document('文字.docx')
all_tables = document.tables

for table in all_tables:   #读取从excel复制到word文档的表格数据并打印出来
	for row in table.rows:
		for cell in row.cells:
			print(cell.text)

读取word文档段落文字+表格数据

import zipfile

word = zipfile.ZipFile("文字段落+表格.docx")
xml = word.read("word/document.xml").decode('utf-8')
# print(xml)

xml_list = xml.split('<w:t>')
# print(xml_list)

text_list = []
for i in xml_list:
	if i.find('</w:t>') + 1:
		text_list.append(i[:i.find('</w:t>')])
	else:
		pass
# print(text_list)

text = "".join(text_list)
print(text)

word格式套用之创造模板

from docx import Document
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.shared import Pt
from docx.oxml.ns import qn

document = Document()
document.styles['Normal'].font.name = u"微软雅黑"
# 设置文档的基础中文字体(因为默认字体不生效，所以要指定中文字体）
document.styles['Normal']._element.rPr.rFonts.set(qn('w:eastAsia'), u'微软雅黑')

def add_context(context):
	p = document.add_paragraph()
	p.alignment = WD_ALIGN_PARAGRAPH.LEFT
	r = p.add_run(str(context))
	r.font.size = Pt(16)
	p.space_after = Pt(5)
	p.space_before = Pt(5)

change = '多少'
add_context('春花秋月何时了，往事知%s，小楼昨夜又东风' % change)
add_context('故国不堪回首明月中，碉楼与其应有在')

document.save('虞美人.docx')

如果后面有批量需求，可以在上面用for循环创建出来

word文档模板的复制套用

from docx import Document
from docx.shared import Pt
from docx.oxml.ns import qn

document = Document('文字段落+表格.docx')
document.styles['Normal'].font.name = u'微软雅黑'
document.styles['Normal']._element.rPr.rFonts.set(qn('w:eastAsia'), u'微软雅黑')
document.styles['Normal'].font.size = Pt(12)

def change_text(old_text, new_text):
	all_paragraphs = document.paragraphs
	for paragraph in all_paragraphs:
		# print(paragraph.text)         #打印每个段落文字，包括中间空白的段落
		for run in paragraph.runs:
			run_text = run.text.replace(old_text, new_text)
			run.text = run_text

	all_tables = document.tables
	for table in all_tables:  # 读取从excel复制到word文档的表格数据并打印出来
		for row in table.rows:
			for cell in row.cells:
				cell_text = cell.text.replace(old_text, new_text)
				cell.text = cell_text

change_text('发生', '龙头')
change_text('扫描', '窥探')
document.save('123.docx')

用程序把word转为pdf

需要安装一个库：pip install pywin32

from win32com.client import Dispatch, constants, gencache

docx_path = 'G:/客户1-价格通知.docx'
pdf_path = 'G:/测试.pdf'

gencache.EnsureModule('{00020905-0000-0000-C000-000000000046}', 0 ,8, 4)

wd = Dispatch('Word.Application')

doc = wd.Documents.Open(docx_path, ReadOnly=1) #原理不需要太多了解

doc.ExportAsFixedFormat(pdf_path, constants.wdExportFormatPDF,Item=constants.wdExportDocumentWithMarkup, CreateBookmarks= constants.wdExportCreateHeadingBookmarks)

wd.Quit(constants.wdDoNotSaveChanges)

识别与读取pdf中的文字

from io import StringIO
from pdfminer.pdfinterp import  PDFResourceManager, process_pdf
from pdfminer.converter import TextConverter
from pdfminer.layout import LAParams

pdf_file = open('G:/测试.pdf', 'rb')  # 不需要理解，直接使用即可
rsrcmgr = PDFResourceManager()
retstr = StringIO()
laparams = LAParams()
device = TextConverter(rsrcmgr=rsrcmgr, outfp=retstr,laparams=laparams)
process_pdf(rsrcmgr=rsrcmgr, device=device, fp=pdf_file)
device.close()
content = retstr.getvalue()
retstr.close()
pdf_file.close()
print(content)