anaconda+python+pycharm代码学习——自动化办公(三)——word自动化处理

Laney_Midory

已于 2022-08-16 15:44:11 修改

阅读量970

点赞数

分类专栏： python拓展学习——自动化办公文章标签：自动化 python docx

于 2022-08-13 17:49:30 首次发布

本文链接：https://blog.csdn.net/Laney_Midory/article/details/126318713

版权

python拓展学习——自动化办公专栏收录该内容

5 篇文章 0 订阅

订阅专栏

本文介绍了如何使用Python的docx库进行Word自动化处理，包括将文字写入Word、调整图片和表格、居中显示图片、创建表格、读取Word内容、制作模板、转换为PDF及批量处理。同时，提到了在处理PDF文件时遇到的字体问题。

摘要由CSDN通过智能技术生成

需要下载的库python-docx
但是导入的时候只需要下面的语句

from docx import Document

把文字写入 Word

在这里插入图片描述
代码如下

from docx import Document
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.shared import Pt  # 磅数
from docx.oxml.ns import qn  # 中文格式
# 以上是docx库中需要用到的部分

import time

price = input('请输入今日价格：')
company_list = ['客户1', '客户2', '客户3', '客户4', '客户5', '客户6', '客户7', '客户8', '客户9', '客户10']

today = time.strftime("%Y{y}%m{m}%d{d}", time.localtime()).format(y='年', m='月', d='日')
# 获取今日时间，整理成“年-月-日”的格式

for i in company_list:
    # 针对每个客户名生成不同文档
    document = Document()

    document.styles['Normal'].font.name = u'宋体'
    # 设置文档的基础字体
    document.styles['Normal']._element.rPr.rFonts.set(qn('w:eastAsia'), u'宋体')
    # 设置文档的基础样式


    p1 = document.add_paragraph()
    # 初始化建立第一个自然段
    p1.alignment = WD_ALIGN_PARAGRAPH.CENTER
    # 对齐方式为居中，没有这句的话默认左对齐。
    run1 = p1.add_run('关于下达%s产品价格的通知' % (today))
    # 这里是第一段的内容
    run1.font.name = '微软雅黑'
    # 设置西文字体
    run1._element.rPr.rFonts.set(qn('w:eastAsia'), u'微软雅黑')
    # 设置中文字体
    run1.font.size = Pt(21)
    # 设置字体大小为21磅
    run1.font.bold = True
    # 设置加粗
    p1.space_after = Pt(5)
    # 段后距离5磅
    p1.space_before = Pt(5)
    # 段前距离5磅

    p2 = document.add_paragraph()
    run2 = p2.add_run(i + '：')
    # 这里是对客户的称呼
    run2.font.name = '仿宋_GB2312'
    run2._element.rPr.rFonts.set(qn('w:eastAsia'), u'仿宋_GB2312')
    run2.font.size = Pt(16)
    run2.font.bold = True

    p3 = document.add_paragraph()
    run3 = p3.add_run('    根据公司安排，为提供优质客户服务，我单位拟定了今日价格为%s元，特此通知。' % price)
    run3.font.name = '仿宋_GB2312'
    run3._element.rPr.rFonts.set(qn('w:eastAsia'), u'仿宋_GB2312')
    run3.font.size = Pt(16)
    run3.font.bold = True

    p4 = document.add_paragraph()
    p4.alignment = WD_ALIGN_PARAGRAPH.CENTER
    run4 = p4.add_run('（联系人：小杨    电话：18888888888）')
    run4.font.name = '仿宋_GB2312'
    run4._element.rPr.rFonts.set(qn('w:eastAsia'), u'仿宋_GB2312')
    run4.font.size = Pt(16)
    run4.font.bold = True

    document.save('%s-价格通知.docx' % i)
# 以“客户名-价格通知”作为文件名保存

在这里插入图片描述

这里要写函数

python内置函数locals

可以动态创建变量，如p1 p2等
具体参考这篇博客
在这里插入图片描述
具体代码如下
真的是强强子啦！

from docx import Document
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.shared import Pt  # 磅数
from docx.oxml.ns import qn  # 中文格式

document = Document()
document.styles['Normal'].font.name = u'黑体'
document.styles['Normal']._element.rPr.rFonts.set(qn('w:eastAsia'), u'黑体')

def add_context(n, context):
    locals()['p' + str(n)] = document.add_paragraph()
    locals()['p' + str(n)].alignment = WD_ALIGN_PARAGRAPH.LEFT
    locals()['run' + str(n)] = locals()['p' + str(n)].add_run(str(context))
    locals()['run' + str(n)].font.size = Pt(16)
    locals()['p' + str(n)].space_after = Pt(5)
    locals()['p' + str(n)].space_before = Pt(5)

add_context(1,'The Zen of Python, by Tim Peters')
add_context(2,'Beautiful is better than ugly.')
add_context(3,'Explicit is better than implicit.')
add_context(4,'Simple is better than complex.')
add_context(5,'Complex is better than complicated.')
add_context(6,'Flat is better than nested.')
add_context(7,'Sparse is better than dense.')
add_context(8,'Readability counts.')
add_context(9,'''Special cases aren't special enough to break the rules.''')
add_context(10,'Although practicality beats purity.')

document.save('C:/Users/Administrator/Desktop/excelcode/doc/作业.docx')

把图片和表格写入 Word

在这里插入图片描述
这里要增加一个库来修改图片尺寸
from docx.shared import Inches # 图片尺寸

增加图片的语句是 document.add_picture(‘banner.jpg’, width=Inches(6))
增加表格的语句是 table = document.add_table(rows=3, cols=3, style=‘Table Grid’)
还需要对表格进行格式设置
table.cell(0, 0).merge(table.cell(0, 2))#第一行单元格合并
#单元格添加文字并且可以设置字体
table_run1 = table.cell(0, 0).paragraphs[0].add_run(‘XX产品报价表’)
#设置对齐方式
table.cell(0, 0).paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.CENTER
#设置字体
table_run1.font.name = u’隶书’
table_run1._element.rPr.rFonts.set(qn(‘w:eastAsia’), u’隶书’)

如果使用默认字体和格式则
table.cell(1, 0).text = ‘日期’

#翻页，插入分页符
document.add_page_break()

总代码如下：

from docx import Document
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.shared import Pt  # 磅数
from docx.oxml.ns import qn  # 中文格式
from docx.shared import Inches  # 图片尺寸
# 以上是docx库中需要用到的部分

import time
today = time.strftime("%Y{y}%m{m}%d{d}", time.localtime()).format(y='年', m='月', d='日')

price = input('请输入今日价格：')
company_list = ['客户1', '客户2', '客户3', '客户4', '客户5', '客户6', '客户7', '客户8', '客户9', '客户10']


for i in company_list:
    document = Document()

    document.styles['Normal'].font.name = u'微软雅黑'
    document.styles['Normal'].font.size = Pt(14)
    # 设置文档的基础字体
    document.styles['Normal']._element.rPr.rFonts.set(qn('w:eastAsia'), u'微软雅黑')
    # 设置文档的基础样式

    document.add_picture('banner.jpg', width=Inches(6))
    # 在文件最上头插入图片作为文件红头，宽度为6英寸

    p1 = document.add_paragraph()
    # 初始化建立第一个自然段
    p1.alignment = WD_ALIGN_PARAGRAPH.CENTER
    # 对齐方式为居中，没有这句的话默认左对齐。
    run1 = p1.add_run('关于下达%s产品价格的通知' % (today))
    # 这里是第一段的内容
    run1.font.name = '微软雅黑'
    # 设置西文字体
    run1._element.rPr.rFonts.set(qn('w:eastAsia'), u'微软雅黑')
    # 设置中文字体
    run1.font.size = Pt(21)
    # 设置字体大小为21磅
    run1.font.bold = True
    # 设置加粗
    p1.space_after = Pt(5)
    # 段后距离5磅
    p1.space_before = Pt(5)
    # 段前距离5磅

    p2 = document.add_paragraph()
    run2 = p2.add_run(i + '：')
    # 这里是对客户的称呼
    run2.font.name = '仿宋_GB2312'
    run2._element.rPr.rFonts.set(qn('w:eastAsia'), u'仿宋_GB2312')
    run2.font.size = Pt(16)
    run2.font.bold = True

    p3 = document.add_paragraph()
    run3 = p3.add_run('    根据公司安排，为提供优质客户服务，我单位现将价格通知如下。')
    run3.font.name = '仿宋_GB2312'
    run3._element.rPr.rFonts.set(qn('w:eastAsia'), u'仿宋_GB2312')
    run3.font.size = Pt(16)
    run3.font.bold = True

    table = document.add_table(rows=3, cols=3, style='Table Grid')

    table.cell(0, 0).merge(table.cell(0, 2))#第一行单元格合并
    table_run1 = table.cell(0, 0).paragraphs[0].add_run('XX产品报价表')#单元格添加文字 并且可以设置字体
    table.cell(0, 0).paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.CENTER
    table_run1.font.name = u'隶书'
    table_run1._element.rPr.rFonts.set(qn('w:eastAsia'), u'隶书')

    #默认字体

    table.cell(1, 0).text = '日期'
    table.cell(1, 1).text = '价格'
    table.cell(1, 2).text = '备注'
    table.cell(2, 0).text = today
    table.cell(2, 1).text = str(price)
    table.cell(2, 2).text = ''

    p4 = document.add_paragraph()
    p4.alignment = WD_ALIGN_PARAGRAPH.CENTER
    run4 = p4.add_run('（联系人：小杨    电话：18888888888）')
    run4.font.name = '仿宋_GB2312'
    document.styles['Normal']._element.rPr.rFonts.set(qn('w:eastAsia'), u'仿宋_GB2312')
    run4.font.size = Pt(16)
    run4.font.bold = True

    document.add_page_break()#翻页，插入分页符
    p5 = document.add_paragraph()
    run4 = p5.add_run('此处是广告')

    document.save('%s-价格通知.docx' % i)
    # 以“客户名-价格通知”作为文件名保存

但是这里有个问题
图片并不居中
那么该怎么使图片居中呢

图片居中

这里不通过document添加图片
而是通过添加一段剧中的段落
再往里面添加图片
具体代码如下

#让图片居中
    paragraph = document.add_paragraph()
    #图片居中设置
    paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER  
    run = paragraph.add_run("")
    run.add_picture('C:/Users/Administrator/Desktop/excelcode/excel-useful/2.png', width=Inches(3))

插入多种类型的表格

在这里插入图片描述

from docx import Document
from docx.enum.style import WD_STYLE_TYPE

document = Document()
styles = document.styles

for i in styles:
    if i.type == WD_STYLE_TYPE.TABLE:
        document.add_paragraph("表格样式 :  " + i.name)
        table = document.add_table(4, 5, style=i)
        document.add_paragraph("\n\n")

document.save('C:/Users/Administrator/Desktop/excelcode/doc/所有表格样式.docx')

从word中读取内容

要分为三种类型
一种是纯文本
一种是纯表格
还有两种都有的情况
代码如下

# -*- coding: utf-8 -*-
"""
Created on Fri Aug 13 11:12:40 2022

@author:Laney_Midory
csdn:Laney_Midory
"""

from docx import Document
'''
document=Document('C:/Users/Administrator/Desktop/excelcode/doc/作业.docx')
#文档里面只有文字——读取文字所有内容
all_para=document.paragraphs
for para in all_para:
    #print(para.text)
    for run in para.runs: #若一个段落里面有多个run则会割裂段落
        print(run.text)


#文档里面只有表格——读取word表格里面的文字
document=Document('C:/Users/Administrator/Desktop/excelcode/doc/表格.docx')
all_tables=document.tables

for table in all_tables:
    for row in table.rows:
        for cell in row.cells:
            print(cell.text)

'''
#若文档是文字+表格，需要通过压缩文件格式来分开读取
import zipfile
word=zipfile.ZipFile('C:/Users/Administrator/Desktop/excelcode/doc/文表.docx')
xml=word.read("word/document.xml").decode('utf-8')#这个是网页的全部内容，需要提取
#print(xml)

xml_list=xml.split('<w:t>')#将字符串分割为列表
text_list=[]
for i in xml_list:
    if i.find('</w:t>')+1:
        text_list.append(i[:i.find('</w:t>')])
    else:
        pass
#print(text_list)#是分开的
#合并
text="".join(text_list)#中间无间隔
print(text)

创造模板

简单文字的模板：

from docx import Document
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.shared import Pt  # 磅数
from docx.oxml.ns import qn  # 中文格式

# 以上是docx库中需要用到的部分
document = Document()

document.styles['Normal'].font.name = u'黑体'
# 设置文档的基础字体
document.styles['Normal']._element.rPr.rFonts.set(qn('w:eastAsia'), u'黑体')
# 设置文档的基础样式，中文字体必须要加这一句否则不是我们设定的字体

def add_context(context):
    p1 = document.add_paragraph()
    # 初始化建立第一个自然段
    p1.alignment = WD_ALIGN_PARAGRAPH.LEFT
    # 对齐方式为居中，没有这句的话默认左对齐。
    run1 = p1.add_run(str(context))
    # 这里是第一段的内容
    run1.font.size = Pt(16)
    # 设置字体大小为21磅
    p1.space_after = Pt(5)
    # 段后距离5磅
    p1.space_before = Pt(5)
    # 段前距离5磅

change='哈哈哈'
add_context('  xxxxxxxxxx%s'%change)
add_context('  !!!!!!!!%s!!!!!!'%change)

document.save('C:/Users/Administrator/Desktop/excelcode/doc/hahaha-%s.docx'%change)

套用模板

复杂文字的模板：

from docx import Document
from docx.shared import Pt  # 磅数
from docx.oxml.ns import qn  # 中文格式

# 以上是docx库中需要用到的部分
document = Document('C:/Users/Administrator/Desktop/excelcode/doc/文表.docx')
document.styles['Normal'].font.name = u'微软雅黑'
document.styles['Normal'].font.size = Pt(12)
# 设置文档的基础字体
document.styles['Normal']._element.rPr.rFonts.set(qn('w:eastAsia'), u'微软雅黑')
# 设置文档的基础样式

def change_text(old_text,new_text):
    all_para=document.paragraphs
    for para in all_para:
        for run in para.runs:
            run_text=run.text.replace(old_text,new_text)
            run.text=run_text

    all_tables=document.tables
    for table in all_tables:
        for row in table.rows:
            for cell in row.cells:
                cell_text=cell.text.replace(old_text,new_text)
                cell.text=cell_text

change_text('Tim Peters','U•ェ•*U')#但此时表格里的文字格式将会变成文档默认字体格式，需要将文档的默认格式设定一下

change_text('价格','U•ェ•*U')
document.save('C:/Users/Administrator/Desktop/excelcode/doc/文表2.docx')

word转pdf

这里得需要用到之前安装的win32com库

pip install pywin32

完整代码，内核比较复杂，直接应用即可

# https://www.lfd.uci.edu/~gohlke/pythonlibs


from win32com.client import Dispatch, constants, gencache

docx_path = 'C:/Users/Administrator/Desktop/excelcode/doc/客户1-价格通知2.docx'
pdf_path = 'C:/Users/Administrator/Desktop/excelcode/doc/客户1-价格通知2.pdf'

gencache.EnsureModule('{00020905-0000-0000-C000-000000000046}', 0, 8, 4)#调用office的功能

wd = Dispatch("Word.Application")#启用word

doc = wd.Documents.Open(docx_path, ReadOnly=1)#用只读的方式打开
doc.ExportAsFixedFormat(pdf_path, constants.wdExportFormatPDF, Item=constants.wdExportDocumentWithMarkup,
                        CreateBookmarks=constants.wdExportCreateHeadingBookmarks)

wd.Quit(constants.wdDoNotSaveChanges)

批量生成pdf并且删除掉doc文件

这个要把前面学到的结合起来

from docx import Document
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.shared import Pt  # 磅数
from docx.oxml.ns import qn  # 中文格式
from docx.shared import Inches  # 图片尺寸
# 以上是docx库中需要用到的部分
from win32com.client import Dispatch, constants, gencache
import datetime
import os
import time
price = input('请输入今日价格：')
company_list = ['客户1', '客户2', '客户3', '客户4', '客户5', '客户6', '客户7', '客户8', '客户9', '客户10']

today = datetime.date.today().strftime('%Y{y}%m{m}%d{d}').format(y='年', m='月', d='日')

for i in company_list:
    document = Document()

    document.styles['Normal'].font.name = u'微软雅黑'
    document.styles['Normal'].font.size = Pt(16)
    # 设置文档的基础字体
    document.styles['Normal']._element.rPr.rFonts.set(qn('w:eastAsia'), u'微软雅黑')
    # 设置文档的基础样式

    document.add_picture('banner.jpg', width=Inches(6))
    # 在文件最上头插入图片作为文件红头，宽度为6英寸

    p1 = document.add_paragraph()
    # 初始化建立第一个自然段
    p1.alignment = WD_ALIGN_PARAGRAPH.CENTER
    # 对齐方式为居中，没有这句的话默认左对齐。
    run1 = p1.add_run('关于下达%s产品价格的通知' % (today))
    # 这里是第一段的内容
    run1.font.name = '微软雅黑'
    # 设置西文字体
    run1._element.rPr.rFonts.set(qn('w:eastAsia'), u'微软雅黑')
    # 设置中文字体
    run1.font.size = Pt(21)
    # 设置字体大小为21磅
    run1.font.bold = True
    # 设置加粗
    p1.space_after = Pt(5)
    # 段后距离5磅
    p1.space_before = Pt(5)
    # 段前距离5磅

    p2 = document.add_paragraph()
    run2 = p2.add_run(i + '：')
    # 这里是对客户的称呼
    run2.font.name = '仿宋_GB2312'
    run2._element.rPr.rFonts.set(qn('w:eastAsia'), u'仿宋_GB2312')
    run2.font.size = Pt(16)
    run2.font.bold = True

    p3 = document.add_paragraph()
    run3 = p3.add_run('    根据公司安排，为提供优质客户服务，我单位现将价格通知如下。')
    run3.font.name = '仿宋_GB2312'
    run3._element.rPr.rFonts.set(qn('w:eastAsia'), u'仿宋_GB2312')
    run3.font.size = Pt(16)
    run3.font.bold = True

    table = document.add_table(rows=3, cols=3, style='Table Grid')

    table.cell(0, 0).merge(table.cell(0, 2))
    table_run1 = table.cell(0, 0).paragraphs[0].add_run('XX产品报价表')
    table.cell(0, 0).paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.CENTER
    table_run1.font.name = u'隶书'
    table_run1._element.rPr.rFonts.set(qn('w:eastAsia'), u'隶书')

    table.cell(1, 0).text = '日期'
    table.cell(1, 1).text = '价格'
    table.cell(1, 2).text = '备注'
    table.cell(2, 0).text = today
    table.cell(2, 1).text = str(price)
    table.cell(2, 2).text = ''

    p4 = document.add_paragraph()
    p4.alignment = WD_ALIGN_PARAGRAPH.CENTER
    run4 = p4.add_run('（联系人：小杨    电话：18888888888）')
    run4.font.name = '仿宋_GB2312'
    document.styles['Normal']._element.rPr.rFonts.set(qn('w:eastAsia'), u'仿宋_GB2312')
    run4.font.size = Pt(16)
    run4.font.bold = True

    document.add_page_break()
    p5 = document.add_paragraph()
    run4 = p5.add_run('此处是广告')
    if os.path.exists('D:/%s-价格通知.docx' % i):
        os.remove('D:/%s-价格通知.docx' % i)
    document.save('D:/%s-价格通知.docx' % i)
    # 以“客户名-价格通知”作为文件名保存
    docx_path = 'D:/%s-价格通知.docx'% i
    pdf_path = 'D:/%s-价格通知.pdf'% i


    gencache.EnsureModule('{00020905-0000-0000-C000-000000000046}', 0, 8, 4)

    wd = Dispatch("Word.Application")

    docx = wd.Documents.Open(docx_path, ReadOnly=1)
    docx.ExportAsFixedFormat(pdf_path, constants.wdExportFormatPDF, Item=constants.wdExportDocumentWithMarkup,
                             CreateBookmarks=constants.wdExportCreateHeadingBookmarks)

    wd.Quit(constants.wdDoNotSaveChanges)
    time.sleep(5)#pdf生成的时间比较慢，因此需要这个来先让pdf生成
    os.remove('D:/%s-价格通知.docx' % i)

读取pdf文件

要用到一个库pdfminer

pip install pdfminer3k

奥但是报错
dfonts = list_value(spec[‘DescendantFonts’]) KeyError: ‘DescendantFonts’
说是没有字体
可能是你的系统缺少某些字体，pdf文件是你生成的么，如果是，那么嵌入下字体。如果不是的话，你只能安装缺失的字体了
黑人问号脸这个问题到目前还不知道怎么解决
但先把代码发上来
如果有知道解决办法的朋友请帮帮孩子

# -*- coding: utf-8 -*-
"""
Created on Fri Aug 13 11:12:40 2022

@author:Laney_Midory
csdn:Laney_Midory
"""
from io import StringIO
from pdfminer.pdfinterp import PDFResourceManager,process_pdf  #pdf资源管理器和进程
from pdfminer.converter import TextConverter   #文字转换
from pdfminer.layout import LAParams

pdf_file=open('C:/Users/Administrator/Desktop/excelcode/doc/客户1-价格通知.pdf','rb')
rsrcmgr=PDFResourceManager()
retstr=StringIO()
laparams=LAParams()

device=TextConverter(rsrcmgr=rsrcmgr,outfp=retstr,laparams=laparams)
process_pdf(rsrcmgr=rsrcmgr,device=device,fp=pdf_file)
device.close()
content=retstr.getvalue()
retstr.close()
pdf_file.close()
print(content)