文章目录
一.Excel
1.xlrd读
安装xlrd:pip install xlrd
excel表:
(1)获取单元格内容
import xlrd
# 打开excel表
xlsx = xlrd.open_workbook('测试表1.xlsx')
# 查找所需表格
sheet = xlsx.sheet_by_index(0)
# 通过sheet名查找:xlsx.sheet_by_name('人物信息表')
# 通过索引查找:xlsx.sheet_by_index(0)
# 输出值(三种方式)
print(sheet.cell_value(0, 0))
print(sheet.cell(0, 0).value)
print(sheet.row(0)[0].value)
运行结果:
(2)获取sheet名
import xlrd
# 打开excel表
xlsx = xlrd.open_workbook('测试表1.xlsx')
# 获取所有sheet名(两种方式)
# 获取sheet数量: xlsx.nsheets
for i in range(0,xlsx.nsheets):
sheet = xlsx.sheet_by_index(i)
print(sheet.name)
# 获取所有sheet名字:xlsx.sheet_names()
for i in xlsx.sheet_names():
print(i)
运行结果:
2.xlwt写
安装xlwt:pip install xlwt
import xlwt
# 新建工作簿
new_workbook = xlwt.Workbook()
# 新建sheet
worksheet = new_workbook.add_sheet('new_test')
# 新建单元格,并写入内容
worksheet.write(0,0,'test')
# 保存
new_workbook.save('test.xls')
运行结果:
3.xlutils设置格式
安装xlutils:pip install xlutils
from xlutils.copy import copy
import xlrd
import xlwt
# 打开excel文件
tem_excel = xlrd.open_workbook('test.xls', formatting_info=True)
# tem_sheet = tem_excel.sheet_by_index(1)
# 复制excel文件
new_excel = copy(tem_excel)
new_sheel = new_excel.get_sheet(0)
# 风格
style = xlwt.XFStyle()
# 字体
font = xlwt.Font()
font.name = '微软雅黑'
font.bold = True
# 18*20
font.height = 360
style.font = font
# 边框:细线==THIH
borders = xlwt.Borders()
borders.top = xlwt.Borders.THIN
borders.bottom = xlwt.Borders.THIN
borders.left = xlwt.Borders.THIN
borders.right = xlwt.Borders.THIN
style.borders = borders
# 对齐
alignment = xlwt.Alignment()
alignment.horz = xlwt.Alignment.HORZ_CENTER # 水平对齐
alignment.vert = xlwt.Alignment.VERT_CENTER # 垂直对齐
style.alignment = alignment
# 没有配置过格式
new_sheel.write(1, 1, 11)
new_sheel.write(2, 1, 12)
new_sheel.write(3, 1, 13)
new_sheel.write(4, 1, 14)
# 配置过格式
new_sheel.write(6, 1, 11, style)
new_sheel.write(7, 1, 12, style)
new_sheel.write(8, 1, 13, style)
new_sheel.write(9, 1, 14, style)
new_excel.save('填写.xls')
运行结果:
4.案例-统计每位同学总分
import xlrd
import xlwt
# 读取excel文件
xlsx = xlrd.open_workbook('学生成绩表.xlsx')
# 选择指定sheet
sheet = xlsx.sheet_by_index(0)
# 依次单元格数据,并统计总分
all_data = []
# 统计共有多少学生,并去重
num_set = set()
for row_i in range(1, sheet.nrows):
num = sheet.cell_value(row_i, 0)
name = sheet.cell_value(row_i, 1)
grade = sheet.cell_value(row_i, 3)
student = {
'num': num,
'name': name,
'grade': grade,
}
all_data.append(student)
num_set.add(num)
print(all_data)
# 计算总分
sum_list = []
for num in num_set:
name = ''
sum = 0
for student in all_data:
if num == student['num']:
sum += student['grade']
name = student['name']
sum_stu = {
'num': num,
'name': name,
'sum': sum,
}
sum_list.append(sum_stu)
print(sum_list)
# 写入新的excel
# 新建工作簿
new_workbook = xlwt.Workbook()
# 新建sheet
worksheet = new_workbook.add_sheet('总分成绩')
# 新建单元格,并写入内容
# 写入第一列的内容
worksheet.write(0, 0, '学号')
worksheet.write(0, 1, '姓名')
worksheet.write(0, 2, '总分')
# 自动写入后面的内容
for row in range(0, len(sum_list)):
worksheet.write(row + 1, 0, sum_list[row]['num'])
worksheet.write(row + 1, 1, sum_list[row]['name'])
worksheet.write(row + 1, 2, sum_list[row]['sum'])
# 保存
new_workbook.save('学生总分成绩表.xls')
运行结果:
5.更灵活的操作方式
import xlwt
workbook = xlwt.Workbook()
sheet= workbook.add_sheet('sheet1')
for i in range(0,300):
sheet.write(0,i,i)
workbook.save('num.xls')
运行结果:
这会报xlwt库自带的一个错误,列不能超过256
如果我们需要列超过256列,可以如下操作:
安装xlsxwriter:pip install xlsxwriter
# 不带格式
import xlsxwriter as xw
workbook = xw.Workbook('number.xlsx')
sheet = workbook.add_worksheet('sheet1')
for i in range(0, 300):
sheet.write(0, i, i)
workbook.close()
运行结果:
# 性能不稳定
import openpyxl
workbook = openpyxl.load_workbook('number.xlsx')
sheet = workbook['sheet1']
sheet['A2'] = '2'
sheet['A3'] = '3'
sheet['A4'] = '4'
workbook.save('num_open.xlsx')
运行结果:
6.将文件夹整理到Excel中
import os
import xlwt
# 目标文件夹
file_path = 'd:/'
# 取出目标文件夹下的文件名
os.listdir(file_path)
new_workbook = xlwt.Workbook()
sheet = new_workbook.add_sheet('new_dir')
n = 0
for i in os.listdir(file_path):
sheet.write(n, 0, i)
n += 1
new_workbook.save('dir.xls')
运行结果:
7.使用Excel画画
from PIL import Image
import xlsxwriter
# 颜色转换函数
def color(value):
digit = list(map(str, range(10))) + list("ABCDEF")
if isinstance(value, tuple):
string = '#'
for i in value:
a1 = i // 16
a2 = i % 16
string += digit[a1] + digit[a2]
return string
elif isinstance(value, str):
a1 = digit.index(value[1]) * 16 + digit.index(value[2])
a2 = digit.index(value[3]) * 16 + digit.index(value[4])
a3 = digit.index(value[5]) * 16 + digit.index(value[6])
return (a1, a2, a3)
# 路径
path = '3.png'
img = Image.open(path)
# img = img.resize((189, 152))
# 转为rgb
imgL = img.convert("P").convert("RGB")
pix = imgL.load()
w, h = imgL.size
workbook = xlsxwriter.Workbook('picture2.xlsx') # 新建excel表
worksheet = workbook.add_worksheet('sheet1') # 新建sheet(sheet的名称为"sheet1")
for j in range(w):
for i in range(h):
color_cell = color(pix[j, i])
# 添加样式
sty = workbook.add_format({'bg_color': '{}'.format(color_cell)})
# 写入
worksheet.write(i, j, '', sty)
# 设置行高
worksheet.set_row(i, 1)
# 设置列宽
worksheet.set_column(0, w - 1, 0.1)
workbook.close()
运行结果:
二.Word
1.批量把文字写入word
安装:pip install python-docx
from docx import Document
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.shared import Pt # 磅数
from docx.oxml.ns import qn # 中文格式
# 以上是docx库中需要用到的部分
import time
price = input('请输入工资调整金额:')
# 全体员工姓名
company_list = ['员工1', '员工2', '员工3', '员工4', '员工5', '员工6']
# 当天的日期
# today1 = time.strftime("%Y-%m-%d", time.localtime())
# today2 = time.strftime("%Y/%m/%d", time.localtime())
today = time.strftime("%Y{y}%m{m}%d{d}", time.localtime()).format(y='年', m='月', d='日')
for i in company_list:
document = Document()
# 设置文档的基础字体中文
document.styles['Normal'].font.name = u'宋体'
# 设置文档的基础字体西文
document.styles['Normal'].element.rPr.rFonts.set(qn('w:eastAsia'), u'宋体')
# 初始化建立第一个自然段
pl = document.add_paragraph()
# 对齐方式为居中,没有这句话默认左对齐
pl.alignment = WD_ALIGN_PARAGRAPH.CENTER
run1 = pl.add_run('关于%s工资调整的通知' % (today))
run1.font.name = '微软雅黑'
run1.element.rPr.rFonts.set(qn('w:eastAsia'), u'微软雅黑')
run1.font.size = Pt(21)
# 是否加粗
run1.font.bold = True
# 前后距离5磅
pl.space_before = Pt(5)
pl.space_after = Pt(5)
# 初始化建立第二个自然段
p2 = document.add_paragraph()
run2 = p2.add_run(i + ":")
# 对员工的称呼
run2.font.name = '仿宋_GB2312'
run2.element.rPr.rFonts.set(qn('w:eastAsia'), u'仿宋_GB2312')
run2.font.size = Pt(16)
# 是否加粗
run2.font.bold = True
# 初始化建立第三个自然段
p3 = document.add_paragraph()
run3 = p3.add_run(" 因疫情影响,我们很抱歉的通知您,您的工资调整为每月%s元,特此通知." % price)
run3.font.name = '仿宋_GB2312'
run3.element.rPr.rFonts.set(qn('w:eastAsia'), u'仿宋_GB2312')
run3.font.size = Pt(14)
# 是否加粗
run3.font.bold = True
# 初始化建立第四个自然段
p4 = document.add_paragraph()
# 没有这句话默认左对齐
p4.alignment = WD_ALIGN_PARAGRAPH.RIGHT
run4 = p4.add_run("人事:源小姐 电话:18888888888")
run4.font.name = '仿宋_GB2312'
run4.element.rPr.rFonts.set(qn('w:eastAsia'), u'仿宋_GB2312')
run4.font.size = Pt(10)
# 是否加粗
run4.font.bold = True
document.save('%s-工资调整通知.docx' % i)
运行结果:
2.批量把图片和表格写入word
from docx import Document
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.shared import Pt, Inches # 磅数
from docx.oxml.ns import qn # 中文格式
# 以上是docx库中需要用到的部分
import time
price = input('请输入工资调整金额:')
# 全体员工姓名
company_list = ['员工1', '员工2', '员工3', '员工4', '员工5', '员工6']
# 当天的日期
# today1 = time.strftime("%Y-%m-%d", time.localtime())
# today2 = time.strftime("%Y/%m/%d", time.localtime())
today = time.strftime("%Y{y}%m{m}%d{d}", time.localtime()).format(y='年', m='月', d='日')
for i in company_list:
document = Document()
# 设置文档的基础字体中文
document.styles['Normal'].font.name = u'宋体'
# 设置文档的基础字体西文
document.styles['Normal'].element.rPr.rFonts.set(qn('w:eastAsia'), u'宋体')
# 红头文件
document.add_picture('3.png', width=Inches(6))
# 初始化建立第一个自然段
pl = document.add_paragraph()
# 对齐方式为居中,没有这句话默认左对齐
pl.alignment = WD_ALIGN_PARAGRAPH.CENTER
run1 = pl.add_run('关于%s工资调整的通知' % (today))
run1.font.name = '微软雅黑'
run1.element.rPr.rFonts.set(qn('w:eastAsia'), u'微软雅黑')
run1.font.size = Pt(21)
# 是否加粗
run1.font.bold = True
# 前后距离5磅
pl.space_before = Pt(5)
pl.space_after = Pt(5)
# 初始化建立第二个自然段
p2 = document.add_paragraph()
run2 = p2.add_run(i + ":")
# 对员工的称呼
run2.font.name = '仿宋_GB2312'
run2.element.rPr.rFonts.set(qn('w:eastAsia'), u'仿宋_GB2312')
run2.font.size = Pt(16)
# 是否加粗
run2.font.bold = True
# 初始化建立第三个自然段
p3 = document.add_paragraph()
run3 = p3.add_run(" 因疫情影响,我们很抱歉的通知您,您的工资调整为每月%s元,特此通知." % price)
run3.font.name = '仿宋_GB2312'
run3.element.rPr.rFonts.set(qn('w:eastAsia'), u'仿宋_GB2312')
run3.font.size = Pt(14)
# 是否加粗
run3.font.bold = True
# 插入表格
table = document.add_table(rows=2, cols=2, style='Table Grid')
# 合并单元格
table.cell(0, 0).merge(table.cell(0, 1))
table_run1 = table.cell(0, 0).paragraphs[0].add_run('签名栏')
table_run1.font.name = u'黑体'
table_run1.element.rPr.rFonts.set(qn('w:eastAsia'), u'黑体')
table.cell(0, 0).paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.CENTER
table.cell(1, 0).text = i
table.cell(1, 0).paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.CENTER
# 初始化建立第四个自然段
p4 = document.add_paragraph()
# 没有这句话默认左对齐
p4.alignment = WD_ALIGN_PARAGRAPH.RIGHT
run4 = p4.add_run("人事:源小姐 电话:18888888888")
run4.font.name = '仿宋_GB2312'
run4.element.rPr.rFonts.set(qn('w:eastAsia'), u'仿宋_GB2312')
run4.font.size = Pt(10)
# 是否加粗
run4.font.bold = True
document.save('%s-工资调整通知.docx' % i)
运行结果:
3.读取word
(1)读纯文本word
from docx import Document
document = Document('pure.docx')
all_paragraphs = document.paragraphs
for p in all_paragraphs:
print(p.text)
运行结果:
(2)读有图片和表格的word
先将word的后缀名.docx改为.zip,然后解压该zip,可以看到原来word是这样存储的
图片存放路径在/word/media/下,word样式为/word/document.xml
import zipfile
word_book = zipfile.ZipFile('word_table.docx')
xml = word_book.read('word/document.xml').decode('utf-8')
# print(xml)
xml_list = xml.split('<w:t>')
print(xml_list)
text_list = []
for i in xml_list:
if i.find('</w:t>') + 1:
text_list.append(i[:i.find('</w:t>')])
else:
pass
text = "".join(text_list)
print(text)
运行结果:
4.word转pdf
安装:pip install win32com,如果报错,请安装pip install pypiwin32
from win32com.client import constants, gencache
# word文件路径
doc_path = r'F:\PythonFile\PythonAuto\day02\word\test.docx'
# 生成pdf文件路径
pdf_path = r'F:\PythonFile\PythonAuto\day02\word\test.pdf'
# 第一种方式:
# 调用gencache指向office功能
# gencache.EnsureModule('{00020905-0000-0000-C000-000000000046}', 0, 8, 4)
# 调用word控件
# word = Dispatch('Word.Application')
# 第二种方式:
word = gencache.EnsureDispatch('Word.Application')
# 以只读方式打开word文档
doc = word.Documents.Open(doc_path, ReadOnly=1)
# 导出生成的pdf文档
doc.ExportAsFixedFormat(pdf_path,
constants.wdExportFormatPDF,
Item=constants.wdExportDocumentWithMarkup,
CreateBookmarks=constants.wdExportCreateHeadingBookmarks)
word.Quit(constants.wdDoNotSaveChanges)
运行结果:
5.word批量转pdf
from docx import Document
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.shared import Pt # 磅数
from docx.oxml.ns import qn # 中文格式
from win32com.client import Dispatch,constants, gencache
# 以上是docx库中需要用到的部分
import time
price = input('请输入工资调整金额:')
# 全体员工姓名
company_list = ['员工1', '员工2', '员工3', '员工4', '员工5', '员工6']
# 当天的日期
# today1 = time.strftime("%Y-%m-%d", time.localtime())
# today2 = time.strftime("%Y/%m/%d", time.localtime())
today = time.strftime("%Y{y}%m{m}%d{d}", time.localtime()).format(y='年', m='月', d='日')
for i in company_list:
document = Document()
# 设置文档的基础字体中文
document.styles['Normal'].font.name = u'宋体'
# 设置文档的基础字体西文
document.styles['Normal'].element.rPr.rFonts.set(qn('w:eastAsia'), u'宋体')
# 初始化建立第一个自然段
pl = document.add_paragraph()
# 对齐方式为居中,没有这句话默认左对齐
pl.alignment = WD_ALIGN_PARAGRAPH.CENTER
run1 = pl.add_run('关于%s工资调整的通知' % (today))
run1.font.name = '微软雅黑'
run1.element.rPr.rFonts.set(qn('w:eastAsia'), u'微软雅黑')
run1.font.size = Pt(21)
# 是否加粗
run1.font.bold = True
# 前后距离5磅
pl.space_before = Pt(5)
pl.space_after = Pt(5)
# 初始化建立第二个自然段
p2 = document.add_paragraph()
run2 = p2.add_run(i + ":")
# 对员工的称呼
run2.font.name = '仿宋_GB2312'
run2.element.rPr.rFonts.set(qn('w:eastAsia'), u'仿宋_GB2312')
run2.font.size = Pt(16)
# 是否加粗
run2.font.bold = True
# 初始化建立第三个自然段
p3 = document.add_paragraph()
run3 = p3.add_run(" 因疫情影响,我们很抱歉的通知您,您的工资调整为每月%s元,特此通知." % price)
run3.font.name = '仿宋_GB2312'
run3.element.rPr.rFonts.set(qn('w:eastAsia'), u'仿宋_GB2312')
run3.font.size = Pt(14)
# 是否加粗
run3.font.bold = True
# 初始化建立第四个自然段
p4 = document.add_paragraph()
# 没有这句话默认左对齐
p4.alignment = WD_ALIGN_PARAGRAPH.RIGHT
run4 = p4.add_run("人事:源小姐 电话:18888888888")
run4.font.name = '仿宋_GB2312'
run4.element.rPr.rFonts.set(qn('w:eastAsia'), u'仿宋_GB2312')
run4.font.size = Pt(10)
# 是否加粗
run4.font.bold = True
document.save('%s-工资调整通知.docx' % i)
# word文件路径
doc_path = r'F:\PythonFile\PythonAuto\day02\word\%s-工资调整通知.docx' % i
# 生成pdf文件路径
pdf_path = r'F:\PythonFile\PythonAuto\day02\word\%s-工资调整通知.pdf' % i
word = gencache.EnsureDispatch('Word.Application')
doc = word.Documents.Open(doc_path, ReadOnly=1)
doc.ExportAsFixedFormat(pdf_path,
constants.wdExportFormatPDF,
Item=constants.wdExportDocumentWithMarkup,
CreateBookmarks=constants.wdExportCreateHeadingBookmarks)
word.Quit(constants.wdDoNotSaveChanges)
time.sleep(5)
运行结果:
三.PDF
1.识别并读取PDF中的文字
安装:pip install pdfminer3k
from io import StringIO
from pdfminer.pdfinterp import PDFResourceManager, process_pdf
from pdfminer.converter import TextConverter
from pdfminer.layout import LAParams
# 打开pdf文件
pdf_file = open('诗.pdf', 'rb')
########默认操作##########
rsrcmgr = PDFResourceManager()
retstr = StringIO()
laparams = LAParams()
device = TextConverter(rsrcmgr=rsrcmgr, outfp=retstr, laparams=laparams)
process_pdf(rsrcmgr=rsrcmgr, device=device, fp=pdf_file)
device.close()
content = retstr.getvalue()
retstr.close()
pdf_file.close()
########默认操作##########
print(content)
运行结果:
四.PPT
1.在PPT中写入文字
安装:pip install python-pptx
(1)第一种方式写入:
from pptx import Presentation
from pptx.util import Inches, Pt
ppt = Presentation()
slide = ppt.slides.add_slide(ppt.slide_layouts[1]) # 在PPT中插入一个幻动片
body_shape = slide.shapes.placeholders
# 第一种写入的方式:
body_shape[0].text = '这是占位符0'
body_shape[1].text = '这是占位符1'
ppt.save('test.pptx')
运行结果:
(2)第二种方式写入:
from pptx import Presentation
from pptx.util import Inches, Pt
ppt = Presentation()
slide = ppt.slides.add_slide(ppt.slide_layouts[1]) # 在PPT中插入一个幻动片
body_shape = slide.shapes.placeholders
# 第二种写入的方式:
title_shape = slide.shapes.title
title_shape.text = '这是标题'
subtitle=slide.shapes.placeholders[1] #取出本页第二个文本框
subtitle.text = '这是文本框'
ppt.save('test.pptx')
运行结果:
(3)第三种方式写入:
from pptx import Presentation
from pptx.util import Inches, Pt
ppt = Presentation()
slide = ppt.slides.add_slide(ppt.slide_layouts[1]) # 在PPT中插入一个幻动片
body_shape = slide.shapes.placeholders
title_shape = slide.shapes.title
title_shape.text = '这是标题'
## 第三种写入的方式:
new_paragraph=body_shape[1].text_frame.add_paragraph()
new_paragraph.text = '新段落'
new_paragraph.font.bold = True
new_paragraph.font.italic = True
new_paragraph.font.size = Pt(15)
new_paragraph.font.underline = True
ppt.save('test.pptx')
运行结果:
(4)添加文本框,并在框中写入:
from pptx import Presentation
from pptx.util import Inches, Pt
ppt = Presentation()
slide = ppt.slides.add_slide(ppt.slide_layouts[1]) # 在PPT中插入一个幻动片
body_shape = slide.shapes.placeholders
title_shape = slide.shapes.title
title_shape.text = '这是标题'
# 设置格式
left = Inches(2)
top = Inches(2)
width = Inches(3)
height = Inches(3)
# 添加文本框
textbox = slide.shapes.add_textbox(left, top, width, height)
textbox.text = 'new textbox'
# 在文本框里添加第二段文字
new_para = textbox.text_frame.add_paragraph()
new_para.text = '第二段文字'
ppt.save('test.pptx')
运行结果:
2.在PPT中写入图片和表格
(1)写入图片
from pptx import Presentation
from pptx.util import Inches, Pt
ppt = Presentation()
slide = ppt.slides.add_slide(ppt.slide_layouts[1]) # 在PPT中插入一个幻动片
# 设置格式
left = Inches(1)
top = Inches(3)
width = Inches(5)
height = Inches(3)
# 添加图片
img = slide.shapes.add_picture('3.png', left, top, width, height)
ppt.save('test.pptx')
运行结果:
(2)写入表格
from pptx import Presentation
from pptx.util import Inches, Pt
ppt = Presentation()
slide = ppt.slides.add_slide(ppt.slide_layouts[1]) # 在PPT中插入一个幻动片
# 设置格式
rows = 2
cols = 2
left = Inches(1)
top = Inches(1)
width = Inches(4)
height = Inches(4)
# 添加表格
table = slide.shapes.add_table(rows, cols, left, top, width, height).table
table.columns[0].width = Inches(1)
table.columns[1].width = Inches(3)
table.cell(0, 0).text = '00'
table.cell(0, 1).text = '01'
table.cell(1, 0).text = '10'
table.cell(1, 1).text = '11'
ppt.save('test.pptx')
运行结果: