安装包:pip install python-docx
word文件处理:要有批量重复的工作前提,word文档按每段来自定义处理,逻辑简单
小案例1 入门–批量通知客户黄金的价格(纯文字)
from docx import Document
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.shared import Pt #磅数
from docx.oxml.ns import qn #中文格式
# 以上是docx库中需要用到的部分
import time
price = input('请输入今日价格')
company_list = ['客户1',"客户2","客户3","客户4","客户5"]
#strftime() 函数接收以时间元组,并返回以可读字符串表示的当地时间,格式由参数format决定
today1 = time.strftime("%Y-%m-%d", time.localtime())
today2 = time.strftime("%Y/%m/%d",time.localtime())
today3 = time.strftime("year-%Y month-%m day-%d",time.localtime())
。
today = time.strftime("%Y{y}%m{m}%d{d}",time.localtime()).format(y="年",m="月",d="日")
# print(today1,today2,today3)
# print(today)
for i in company_list:
document = Document()
#设置文档的基础字体
document.styles['Normal'].font.name = u"宋体"
#设置文档的基础中文字体(因为默认字体不生效,所以要指定中文字体)
document.styles['Normal']._element.rPr.rFonts.set(qn('w:eastAsia'),u'宋体')
# 初始化建立第一个自然段
p1 = document.add_paragraph()
#对齐方式为居中,没有这句的话默认左对齐
p1.alignment = WD_ALIGN_PARAGRAPH.CENTER
run1 = p1.add_run("关于下达%s产品的价格通知" %(today))
run1.font.name = '微软雅黑'
run1._element.rPr.rFonts.set(qn('w:eastAsia'),u'微软雅黑')
run1.font.size = Pt(21) #设置字体大小
run1.font.bold = True #设置加粗
p1.space_after = Pt(5) # 段后距离5磅
p1.space_before = Pt(5)
p2 = document.add_paragraph()
run2 = p2.add_run(i + ":") #这里是对客户的称呼
run2.font.name = '仿宋_GB2312'
run2._element.rPr.rFonts.set(qn('w:eastAsia'), u'仿宋')
run2.font.size = Pt(16)
run2.font.bold = True
p3 = document.add_paragraph()
run3 = p3.add_run(' 根据公司安排,危提供客户优质服务,我单位拟定今日黄金价格为%s元,特此通知' % price) #这里是对客户的称呼
run3.font.name = '仿宋_GB2312'
run3._element.rPr.rFonts.set(qn('w:eastAsia'), u'仿宋')
run3.font.size = Pt(16)
run3.font.bold = True
p4 = document.add_paragraph()
p4.alignment = WD_ALIGN_PARAGRAPH.CENTER
run4 = p4.add_run('(联系人:小杨 电话:188888888)')
run3._element.rPr.rFonts.set(qn('w:eastAsia'), u'仿宋')
run3.font.size = Pt(16)
run3.font.bold = True
document.save('%s-价格通知.docx' %i)
小案例2 入门–批量通知客户黄金的价格(加图片和表格)
from docx import Document
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.shared import Pt #磅数
from docx.oxml.ns import qn #中文格式
from docx.shared import Inches #图片尺寸
# 以上是docx库中需要用到的部分
import time
price = input('请输入今日价格')
company_list = ['客户1',"客户2","客户3","客户4","客户5"]
today = time.strftime("%Y{y}%m{m}%d{d}",time.localtime()).format(y="年",m="月",d="日")
for i in company_list:
document = Document()
#设置文档的基础字体
document.styles['Normal'].font.name = u"微软雅黑"
#设置文档的基础中文字体(因为默认字体不生效,所以要指定中文字体)
document.styles['Normal']._element.rPr.rFonts.set(qn('w:eastAsia'),u'微软雅黑')
#在文件最上头插入图片作为文件红头,宽度为6英寸
document.add_picture('test.jpg',width=Inches(6))
# 初始化建立第一个自然段
p1 = document.add_paragraph()
#对齐方式为居中,没有这句的话默认左对齐
p1.alignment = WD_ALIGN_PARAGRAPH.CENTER
run1 = p1.add_run("关于下达%s产品的价格通知" %(today))
run1.font.name = '微软雅黑'
run1._element.rPr.rFonts.set(qn('w:eastAsia'),u'微软雅黑')
run1.font.size = Pt(21) #设置字体大小
run1.font.bold = True #设置加粗
p1.space_after = Pt(5) # 段后距离5磅
p1.space_before = Pt(5)
p2 = document.add_paragraph()
run2 = p2.add_run(i + ":") #这里是对客户的称呼
run2.font.name = '仿宋_GB2312'
run2._element.rPr.rFonts.set(qn('w:eastAsia'), u'仿宋')
run2.font.size = Pt(16)
run2.font.bold = True
p3 = document.add_paragraph()
run3 = p3.add_run(' 根据公司安排,危提供客户优质服务,我单位拟定今日黄金价格为%s元,特此通知' % price) #这里是对客户的称呼
run3.font.name = '仿宋_GB2312'
run3._element.rPr.rFonts.set(qn('w:eastAsia'), u'仿宋_GB2312')
run3.font.size = Pt(16)
run3.font.bold = True
table = document.add_table(rows=3,cols=3, style='Table Grid')
table.cell(0, 0).merge(table.cell(0, 2))
table_run1 = table.cell(0, 0).paragraphs[0].add_run("XX产品报价表")
table_run1.font.name = u'隶书'
table_run1._element.rPr.rFonts.set(qn('w:eastAsia'), u'隶书')
table.cell(0, 0).paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.CENTER
table.cell(1, 0).text = '日期' #第一行第0烈
table.cell(1, 1).text = '价格'
table.cell(1, 2).text = '备注'
table.cell(2, 0).text = today
table.cell(2, 1).text = str(price)
table.cell(2, 2).text = ''
p4 = document.add_paragraph()
p4.alignment = WD_ALIGN_PARAGRAPH.CENTER
run4 = p4.add_run('(联系人:小杨 电话:188888888)')
run4.font.name = '仿宋_GB2312'
run4._element.rPr.rFonts.set(qn('w:eastAsia'), u'仿宋_GB2312')
run4.font.size = Pt(16)
run4.font.bold = True
document.add_page_break()
p5 = document.add_paragraph()
run5 = p5.add_run("此处是广告")
document.save('%s-价格通知.docx' %i)
ptyhon读取word文档的数据(文字段落/ 表格)
- 读取word文档段落文字
from docx import Document
document = Document("文字.docx")
all_paragraphs = document.paragraphs
for paragraph in all_paragraphs:
print(paragraph.text) #打印每个段落文字,包括中间空白的段落
#for run in paragraph.runs:
#print(run.text) #run.text会把段落割裂,一般不使用,使用段落打印即可
- 读取word文档表格数据
document = Document('文字.docx')
all_tables = document.tables
for table in all_tables: #读取从excel复制到word文档的表格数据并打印出来
for row in table.rows:
for cell in row.cells:
print(cell.text)
- 读取word文档段落文字+表格数据
import zipfile
word = zipfile.ZipFile("文字段落+表格.docx")
xml = word.read("word/document.xml").decode('utf-8')
# print(xml)
xml_list = xml.split('<w:t>')
# print(xml_list)
text_list = []
for i in xml_list:
if i.find('</w:t>') + 1:
text_list.append(i[:i.find('</w:t>')])
else:
pass
# print(text_list)
text = "".join(text_list)
print(text)
- word格式套用之创造模板
from docx import Document
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.shared import Pt
from docx.oxml.ns import qn
document = Document()
document.styles['Normal'].font.name = u"微软雅黑"
# 设置文档的基础中文字体(因为默认字体不生效,所以要指定中文字体)
document.styles['Normal']._element.rPr.rFonts.set(qn('w:eastAsia'), u'微软雅黑')
def add_context(context):
p = document.add_paragraph()
p.alignment = WD_ALIGN_PARAGRAPH.LEFT
r = p.add_run(str(context))
r.font.size = Pt(16)
p.space_after = Pt(5)
p.space_before = Pt(5)
change = '多少'
add_context('春花秋月何时了,往事知%s,小楼昨夜又东风' % change)
add_context('故国不堪回首明月中,碉楼与其应有在')
document.save('虞美人.docx')
如果后面有批量需求,可以在上面用for循环创建出来
- word文档模板的复制套用
from docx import Document
from docx.shared import Pt
from docx.oxml.ns import qn
document = Document('文字段落+表格.docx')
document.styles['Normal'].font.name = u'微软雅黑'
document.styles['Normal']._element.rPr.rFonts.set(qn('w:eastAsia'), u'微软雅黑')
document.styles['Normal'].font.size = Pt(12)
def change_text(old_text, new_text):
all_paragraphs = document.paragraphs
for paragraph in all_paragraphs:
# print(paragraph.text) #打印每个段落文字,包括中间空白的段落
for run in paragraph.runs:
run_text = run.text.replace(old_text, new_text)
run.text = run_text
all_tables = document.tables
for table in all_tables: # 读取从excel复制到word文档的表格数据并打印出来
for row in table.rows:
for cell in row.cells:
cell_text = cell.text.replace(old_text, new_text)
cell.text = cell_text
change_text('发生', '龙头')
change_text('扫描', '窥探')
document.save('123.docx')
用程序把word转为pdf
需要安装一个库:pip install pywin32
from win32com.client import Dispatch, constants, gencache
docx_path = 'G:/客户1-价格通知.docx'
pdf_path = 'G:/测试.pdf'
gencache.EnsureModule('{00020905-0000-0000-C000-000000000046}', 0 ,8, 4)
wd = Dispatch('Word.Application')
doc = wd.Documents.Open(docx_path, ReadOnly=1) #原理不需要太多了解
doc.ExportAsFixedFormat(pdf_path, constants.wdExportFormatPDF,Item=constants.wdExportDocumentWithMarkup, CreateBookmarks= constants.wdExportCreateHeadingBookmarks)
wd.Quit(constants.wdDoNotSaveChanges)
识别与读取pdf中的文字
from io import StringIO
from pdfminer.pdfinterp import PDFResourceManager, process_pdf
from pdfminer.converter import TextConverter
from pdfminer.layout import LAParams
pdf_file = open('G:/测试.pdf', 'rb') # 不需要理解,直接使用即可
rsrcmgr = PDFResourceManager()
retstr = StringIO()
laparams = LAParams()
device = TextConverter(rsrcmgr=rsrcmgr, outfp=retstr,laparams=laparams)
process_pdf(rsrcmgr=rsrcmgr, device=device, fp=pdf_file)
device.close()
content = retstr.getvalue()
retstr.close()
pdf_file.close()
print(content)