Python Office 自动化操作学习笔记 openpyxl PyPDF3 python-docx win32com.client csv json

最新推荐文章于 2023-02-24 21:29:31 发布
__TRIX
最新推荐文章于 2023-02-24 21:29:31 发布
阅读量214
点赞数
分类专栏： PythonProcessProgram 文章标签： python json
本文链接：https://blog.csdn.net/Tonymot/article/details/120280988
版权
PythonProcessProgram 专栏收录该内容
33 篇文章 1 订阅
订阅专栏
# -*- coding: utf-8 -*-
# Version: Python 3.9.5
# Author: TRIX
# Date: 2021-09-08 17:03:29
# Use:office include: excel pdf word csv json
目录：
openpyxl
PyPDF3
python-docx
win32com.client
csv
json
#openpyxl excel表格处理
import openpyxl#需要安装 pip install openpyxl
from openpyxl import Workbook
xlsx=Workbook()#创建xlsx
xlsx=openpyxl.load_workbook(filename,data_only=False)#打开xlsx文件 类的实例化 data_only=True返回单元格的计算后的值 False将返回单元格的原字符串
xlsx.sheetnames#xlsx文件所有sheet名字组成的列表 只能引用 不能命名
xlsx.save(filename.xlsx)#程序结束时使用 只支持xlsx格式
xlsx.remove(sheet_name)#删除sheet
del xlsx['sheet_name']#删除sheet
copy_sheet=xlsx.copy_worksheet(source_xlsx)#sheet副本
sheet=xlsx.create_sheet('sheet_name',index)#新建sheet 插到索引位 索引值从0开始 不填默认插到最后
sheet=xlsx.active#激活的sheet 相当于xlsx打开后默认显示的sheet
sheet=xlsx['sheet_name']#打开xlsx中对应名字的sheet
sheet.title#表格名字 只能引用 和更改 不能命名
sheet.append(list)#写入行 从左下角第一个空单元格开始向右填充值 最多嵌套两层列表 列表中的列表表示行 列表的元素表示单元格值
for row in sheet.values:#sheet所有值数据 不包含值格式
	for value in row:pass
sheet.sheet_properties.tabColor='1072BA'#改变sheet标签颜色
sheet.max_row#最大行 只能引用 不能命名
sheet.max_column#最大列 只能引用 不能命名
sheet.row_dimensions[num_index].height=40#行高 类似列表切片 默认12.75
sheet.column_dimensions['letter_index'].width=30#列宽 类似列表切片 默认8.43
sheet.merge_cells('A1:C3')#合并一个矩形区域中的单元格
sheet.unmerge_cells('A1:C3')#拆分一个矩形区域中的单元格 只会保留左上角值在左上角单元格
sheet.freeze_panes='coordinate'#冻结行 列 滚动sheet时 冻结的行列不会被滚动
list(sheet.rows)#所有行 每行单元格共组成一个元组 所有元组组成一个生成器
list(sheet.columns)#所有列 每列单元格共组成一个元组 所有元组组成一个生成器
a1=sheet['A1']#单元格 'ColumnRow'
a1.value#单元格值 只能引用 不能命名 如果是日期格式 会自动转为datetime.datetime()类
a1.row#单元格行值 只能引用 不能命名
a1.column#单元格列值 只能引用 不能命名
a1.coordinate#单元格坐标 只能引用 不能命名
a1=sheet.cell(row=row_num,column=col_num,value=None)#把value写入excel 用数字来代替字母形式 数字从1开始 而不是0 value填充会覆盖单元格值 不写将返回单元格原有的值
from openpyxl.utils import get_column_letter, column_index_from_string
get_column_letter(int)#返回数字对应的列的字母
column_index_from_string('letter')#返回字母对应的列的数字
a1=value#填充值 可以输入excel公式 如 "=SUM(A1, B2)"
a1=datetime.datetime.now().strftime("%Y-%m-%d")#填充当前日期
from openpyxl.styles import Font, Border, Side, PatternFill, colors, Alignment#单元格格式
a1.font=Font(name='等线', size=24, italic=True, underline=True,color=colors.RED, bold=True)#等线 24号 加粗 斜体 下划线 红色 默认字体11
a1.alignment=Alignment(horizontal='center', vertical='right')#水平居中 竖直居右
left, right, top, bottom=[Side(style='thin', color='000000')] * 4
a1.border=Border(left=left, right=right, top=top, bottom=bottom)#边框
cell_slice=sheet['coordinate_start':'coordinate_end']
column=sheet['column_letter']
column_slice=sheet['letter_start:letter_end']
row=sheet[row_num]
row_slice=sheet[num_start:num_end]
for row in sheet.iter_rows(min_row=row_num,min_col=col_num,max_row=row_num,max_col=col_num,values_only=False):#指定行 values_only 只有值 没有坐标
	for cell in row:pass
for col in sheet.iter_cols(min_row=row_num,min_col=col_num,max_row=row_num,max_col=col_num,values_only=False):pass#指定列 values_only 只有值 没有坐标
for row in sheet.rows:
	for cell in row:
		print(cell.value,cell.coordinate)
for column in sheet.columns:
	for cell in column:
		print(cell.value,cell.coordinate)
cols_list=list(zip(*rows_list))#矩阵置换 矩阵旋转 行转列 列转行 若某一单元格缺少数据 会被舍弃这一列/行
from openpyxl.drawing.image import Image#插入图像
sheet.add_image(Image('logo.png'), 'A1')#添加到工作表并锚定在单元格旁边

#pdf处理
import PyPDF3#pip install PyPDF3
#pypdf3\utils.py 里的 两个 'latin-1' 改为 'utf-8' 可能会出错
with open('file.pdf','rb') as f:#二进制读取pdf 文本提取 字符可能会异常 所以一般不读取PDF的每个字符
	pdf_reader=PyPDF3.PdfFileReader(f)#读取器
pdf_reader.numPages#页数
pdf_reader.isEncrypted#返回是否加密
pdf_reader.decrypt('str')#用str解密 只解密了pdf_file file.pdf本身仍然是加密状态
pageCount=pdf_reader.getNumPages()#返回一共多少页
page=pdf_reader.getPage(index)#读取某页 从0开始
page.rotateClockwise(90)#顺时针旋转页面90°
page.mergePage(another_page)#将another_page叠加到page上 用于水印
page_text=page.extractText()#提取某页文本

pdf_writer=PyPDF3.PdfFileWriter()#写入器
pdf_writer.addBlankPage()#追加一页空白页
pdf_writer.insertBlankPage(index=0)#插入一页空白页 从0开始
pdf_writer.getNumpages()#返回一共多少页
pdf_writer.insertPage(pdf_reader.getPage(pagenum),index=0)#从已有的pdf复制一页插入到index页
pdf_writer.encrypt('str')#用str加密
for pagenum in range(pdf_reader.numPages):
	pdf_writer.addPage(pdf_reader.getPage(pagenum))#从已有的pdf复制每页到新建的pdf addPage只能在末尾添加页面
with open('newfile.pdf','wb') as f:
	pdf_writer.write(f)#向newfile.pdf二进制写入文本

#word 简单读写操作
import docx#pip install python-docx
#docx: document-paragraph-run
docx=docx.Document('file.docx')#打开docx
new_doc=docx.Document()#新建docx
new_doc.add_picture('pic_file',width=docx.shared.Inches(float),height=docx.shared.Cm(float))#加图像 可选 width height 英寸 或 厘米 不填使用默认值
new_doc.add_heading(str,int)#加标题 0-4 0最大 4最小 0是Title样式 适用于顶部标题 1适用于分章标题
new_para=new_doc.add_paragraph(str,'Style')#加段落 设置段落样式 str为para里的第一个run
new_para.add_run(str)#加run
new_doc.save('file.docx')#保存docx
paragraphs_list=docx.paragraphs#段落列表
para1=docx.paragraphs[0]#第一段
para1.text#paragraph文本字符串
para1.style='Style'#设置段落样式

runs_list=para1.runs#paragraph run对象 每种格式的字符串为一个run 如 plain bold italic 为3个不同的run对象
run1=runs_list[0]#第一个run
run1.text=sting#run的文本字符串
run1.style='StyleChar'#设置run字符串样式
run1.bold=True#加粗
run1.italic=True#斜体
run1.underline=True#下划线
run1.strike=True#删除线
run1.double_strike=True#双删除线
run1.all_caps=True#大写首字母
run1.small_caps=True#大写首字母 其他字母小写 且比首字母小2点
run1.shadow=True#阴影
run1.outline=True#轮廓线 不是实心
run1.rtl=True#右到左书写
run1.imprint=True#刻入页面
run1.emboss=True#突出页面
run1.add_break()#添加换行符\n
run1.add_break(docx.text.WD_BREAK.PAGE)#添加换页符

'Style':
Normal
BodyText
BodyText2-3
Caption
Heading1-9
IntenseQuote
List
List2-3
ListBullet
ListBullet2-3
ListContinue
ListContinue2-3
ListNumber
ListNumber2-3
ListParagraph
MacroText
NoSpacing
Quote
Subtitle
TOCHeading
Title

#word 操作
#更多操作：word-开发工具-visual basic(alt+f11)-对象浏览器(F2)-查看对象名称
#查找对象名称：https://docs.microsoft.com/zh-cn/dotnet/api/microsoft.office.interop.word?view=word-pia
from win32com.client import Dispatch#pip install pypiwin32
word=Dispatch('Word.Application')#打开word
word.Visible=True#显示word界面
word.WebOptions.Encoding='utf-8'#设置docx编码格式 防止其他模块读取乱码 比如pyPDF3
docx=word.Documents.Add()#新建docx
docx.word.Documents.Open('file.docx')#打开docx
docx.Save('file.docx')#保存为docx
docx.SaveAs('file.pdf',FileFormat=17,Encoding=65001)#保存为pdf utf-8编码
docx.SaveAs('file.html', FileFormat = wc.wdFormatHTML )
docx.Close()#关闭docx
word.Quit()#退出word

pageFormat=docx.PageSetup#页面格式对象
pageFormat.TopMargin=64#上边距64磅
pageFormat.PaperSize=7#页面大小 A3 A4 分别为6 7

styles=docx.Styles#样式集合对象 每种类型都含样式
mainBody=styles(-1)#正文样式对象 标题 123 分别为 -2 -3 -4 页眉-32 其他百度
mainBody.Font.Name='微软雅黑'#正文字体
mainBody.Font.Size=16#正文字号
八号=5
七号=5.5
小六=6.5
六号=7.5
小五=9
五号=10.5
小四=12
四号=14
小三=15
三号=16
小二=18
二号=22
小一=24
一号=26
小初=36
初号=42
cursor=word.Selection#获得光标对象并进行选中
cursor.TypeText('str')#在docx中写入str 并将光标放在str后
cursor.Text='str'#在docx中写入str 并选中str
cursor.TypeParagraph()#另起一段
cursor.Copy()#复制当前选中文本
cursor.Paste()#粘贴文本
strSelected=cursor()#查看光标选中文本
cursor.Start=0#选择起始点 从0开始 光标在字符前 一个字符为一个索引
cursor.End=index#选择结束点
cursor.Delete(num)#删除指定数量的字符或单词 不填 等效 按一次Backspace删除
cursor.EscapeKey()#等效 按一次Esc取消
cursor.WholeStory()#等效 ctrl+a全选
cursor.MoveLeft()#光标向左移动
cursor.MoveRight(1,n)#向右移动n个字符 第一个参数表示移动单位
cursor.HomeKey()#等效 按一次Home移动到该行开头
cursor.StartOf()#将光标移动到字符串首位字符
cursor.EndKey()#等效 按一次End移动到该行结尾
cursor.EndOf()#将光标移动到字符串结尾字符
cursor.Range=Range#代表指定对象中包含的文档部分。

font=cursor.Font#获得字体对象
font.Name='微软雅黑'#字体名称
font.Size=int #字体大小

paraFormat=cursor.ParagraphFormat#获得段落格式对象
paraFormat.Alignment=0#对齐 左中右 分别为 012
paraFormat.LineSpacingRule=0#行距 单倍 1.5倍 双倍 分别为0 1 2
paraFormat.LeftIndent=20#左缩进值21磅

word.Selection.Find.Execute(oldstr, False, False, False, False, False, True, 1, False, newstr, 2)#用newstr替换oldstr
#参数说明
原字符
是否区分大小写
是否仅查找整个字符
是否使用通配符
是否查找读音相似的词 例如 to too
是否查找文字所有形式 如 sit 坐
是否 向前搜索
搜索模式 1为wrap
是否只查找字符格式
用来替换的字符
替换模式 2为全部替换
while cursor.Find.Execute(FindText='str'):#替换指定字符为指定字体和指定大小
	cursor.Font.Name = "宋体"
	cursor.Font.Size = 16

window=docx.windows(1)# 获得文档的第一个窗口
window.View.SeekView=4# 获得页脚视图
window.View.SeekView=1# 获得页眉视图
window.View.SeekView=0# 获得主体视图

#公文标准
from win32com.client import Dispatch #pip install pypiwin32
cmToPoint=28.35#1cm=28.35磅
word=Dispatch('Word.Application')
docx=word.Documents.Open('file.docx')

docx.PageSetup.TopMargin=3.3*cmToPoint# 上边距3.3厘米
docx.PageSetup.BottomMargin=3.3*cmToPoint# 下边距3.3厘米
docx.PageSetup.LeftMargin=2.8*cmToPoint# 左边距2.8厘米
docx.PageSetup.RightMargin=2.6*cmToPoint# 右边距2.6厘米
docx.PageSetup.PaperSize=7#页面大小 A3 A4 分别为6 7

# 设置字体为仿宋 16
docx.Styles(-1).Font.Name='仿宋'
docx.Styles(-1).Font.NameFarEast='仿宋'
docx.Styles(-1).Font.NameAscii='仿宋'
docx.Styles(-1).Font.NameOther='仿宋'
docx.Shapes.Range(["文本框 24"]).TextFrame.TextRange.Font.Name = "仿宋"
docx.Styles(-1).Font.Size=16

docx.PageSetup.LayoutMode=1# 指定行和字符网格
docx.PageSetup.CharsLine=28# 每行28个字
docx.PageSetup.LinesPage=22# 每页22行，会自动设置行间距

# 页码设置
docx.PageSetup.FooterDistance=2.8*cmToPoint# 页码距下边缘2.8厘米
docx.PageSetup.DifferentFirstPageHeaderFooter=0# 首页页码相同
docx.PageSetup.OddAndEvenPagesHeaderFooter=0# 页脚奇偶页相同

window=docx.windows(1)# 获得文档的第一个窗口
window.View.SeekView=4# 获得页眉页脚视图
cursor=window.Selection# 获取窗口的选择对象
cursor.HeaderFooter.PageNumbers.StartingNumber=1# 设置起始页码
cursor.HeaderFooter.PageNumbers.NumberStyle=0# 设置页码样式为单纯的阿拉伯数字
cursor.WholeStory()# 扩选到整个部分（会选中整个页眉页脚）
cursor.Delete()#按下删除键，这两句是为了清除原来的页码
cursor.HeaderFooter.PageNumbers.Add(4)# 添加页面外侧页码
cursor.MoveLeft(1, 2)# 移动到页码左边，移动了两个字符距离
cursor.TypeText('— ')# 给页码左边加上一字线，注意不是减号
cursor.MoveRight()#移动到页码末尾，移动了一个字符距离# 默认参数是1（字符）
cursor.TypeText(' —')
cursor.WholeStory()# 扩选到整个页眉页脚部分，此处是必要的  否则s只是在输入一字线后的一个光标，没有选择区域
cursor.Font.Name='宋体'
cursor.Font.Size=14#页码字号为四号
cursor.paragraphformat.rightindent=21#页码向左缩进1字符（21磅）
cursor.paragraphformat.leftindent=21# 页码向右缩进1字符（21磅）
docx.Styles('页眉').ParagraphFormat.Borders(-3).LineStyle=0# 页眉无底边框横线

csv#comma-separated values 逗号分隔的值 简化的表格 纯文本
import csv
file=open('file.csv')
file_reader=csv.reader(file)#读取器
data_list=list(file_reader)#每行为一个列表 所有行组成一个列表 二维列表
file_reader.line_num#从1开始

out_file=open('file.csv','w')#每行之间有空行 一般不用
out_file=open('file.csv','w',newline='')#每行之间没有空行
file_writer=csv.writer(out_file)#写入器
file_writer.writerow(list)#写入行 如果元素不止一个单词 会自动给字符串添加""
file_writer.writerows(2d_list)#写入多行 如果元素不止一个单词 会自动给字符串添加""

keys_list=[key1,key2]
file_writer=csv.DictWriter(out_file,keys_list)#以字典键为识别元素 字典写入器
file_writer.writeheader()#写入字典键
file_writer.writerows(row_dicts_list)#字典 键对应keys_list的元素 值写到相应的索引 [{key1:value},{key2:value},]
file_writer=csv.writer(out_file,delimiter='\t',lineterminator='\n\n')#分隔符为\t 行终止字符为\n\n 默认分别为, \n

json#javascript object notation 纯文本
{"key":"value",}
import json
data_dict=json.loads(json_data)#json转dict
json_data=json.dumps(data_dict)#值只能是 dict list int float str bool None
__TRIX
关注
0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
Python Office 自动化操作学习笔记 openpyxl PyPDF3 python-docx win32com.client csv json

# -*- coding: utf-8 -*-# Version: Python 3.9.5# Author: TRIX# Date: 2021-09-08 17:03:29# Use:office include: excel pdf word csv json目录：openpyxlPyPDF3python-docxwin32com.clientcsvjson#openpyxl excel表格处理import openpyxl#需要安装 pip install openpyxl
复制链接

扫一扫