在Python中处理Excel、PDF和PPT文件的自动化脚本是非常实用的,下面小编将分别介绍如何使用这些库来自动化处理这些文件类型。
【文末有惊喜福利🎁】
Excel自动化处理
1. 读取Excel文件
import pandas as pd
df = pd.read_excel('example.xlsx')
print(df.head())
2. 写入Excel
df.to_excel('output.xlsx', index=False)
3. 合并多个Excel文件
import glob
all_dfs = []
for file in glob.glob("*.xlsx"):
df = pd.read_excel(file)
all_dfs.append(df)
combined_df = pd.concat(all_dfs, ignore_index=True)
combined_df.to_excel('merged.xlsx', index=False)
Word自动化处理
4. 读取Word文档
from docx import Document
doc = Document('example.docx')
for para in doc.paragraphs:
print(para.text)
5. 写入Word文档
from docx import Document
doc = Document()
doc.add_paragraph('Hello World!')
doc.save('hello.docx')
PDF自动化处理
6. 读取PDF文本内容
import PyPDF2
pdf_file = open('example.pdf', 'rb')
read_pdf = PyPDF2.PdfReader(pdf_file)
print(read_pdf.pages[0].extract_text())
7. 将PDF转换为文本文件
from PyPDF2 import PdfReader
import io
reader = PdfReader(open("example.pdf", "rb"))
text = ''
for page in reader.pages:
text += page.extract_text()
with io.open("output.txt", "w", encoding="utf-8") as text_file:
text_file.write(text)
读者福利:对Python感兴趣的童鞋,为此小编专门给大家准备好了Python全套的学习资料
Python大礼包:《python安装工具&全套学习资料》免费分享(安全链接,放心点击)
PPT自动化处理
8. 读取PPT内容
from pptx import Presentation
prs = Presentation('example.pptx')
for slide in prs.slides:
for shape in slide.shapes:
if hasattr(shape, "text"):
print(shape.text)
9. 创建新幻灯片并添加文本
from pptx import Presentation
prs = Presentation()
slide_layout = prs.slide_layouts[1]
slide = prs.slides.add_slide(slide_layout)
title = slide.shapes.title
title.text = "Hello, PowerPoint!"
prs.save('test.pptx')
更多功能拓展
10. Excel数据筛选
filtered_df = df[df['Column Name'] == 'Some Value']
11. Word插入图片
from docx import Document
from docx.shared import Inches
doc = Document()
doc.add_picture('image.png', width=Inches(1.25))
doc.save('document.docx')
12. PDF合并
from PyPDF2 import PdfWriter
pdf_writer = PdfWriter()
for filename in ['file1.pdf', 'file2.pdf']:
pdf_reader = PdfReader(filename)
for page in range(len(pdf_reader.pages)):
pdf_writer.add_page(pdf_reader.pages[page])
with open("merged.pdf", "wb") as out:
pdf_writer.write(out)
13. PPT批量替换文本
from pptx import Presentation
prs = Presentation('input.pptx')
for slide in prs.slides:
for shape in slide.shapes:
if hasattr(shape, "text_frame"):
for paragraph in shape.text_frame.paragraphs:
for run in paragraph.runs:
run.text = run.text.replace('old_text', 'new_text')
prs.save('output.pptx')
14. Excel数据透视表
pivot_table = pd.pivot_table(df, values='Sales', index=['Category'], aggfunc=np.sum)
15. Word文档加密
from docx2txt import process
from docx import Document
from docx.enum.dml import MSO_THEME_COLOR_INDEX
doc = Document()
doc.add_paragraph('This is a protected document.')
doc.core_properties.protection = 'read-only'
doc.core_properties.content_status = 'Final'
doc.save('protected.docx')
16. PDF添加水印
from PyPDF2 import PdfReader, PdfWriter
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter
def add_watermark(input_pdf_path, output_pdf_path, watermark_text):
pdf_writer = PdfWriter()
pdf_reader = PdfReader(input_pdf_path)
for page in pdf_reader.pages:
packet = io.BytesIO()
can = canvas.Canvas(packet, pagesize=letter)
can.setFont("Helvetica", 80)
can.setFillColorRGB(0.5, 0.5, 0.5)
can.drawString(100, 750, watermark_text)
can.save()
# Move to the beginning of the StringIO buffer
packet.seek(0)
new_pdf = PdfReader(packet)
page.merge_page(new_pdf.pages[0])
pdf_writer.add_page(page)
# Write out the merged PDF
with open(output_pdf_path, "wb") as outputStream:
pdf_writer.write(outputStream)
add_watermark('original.pdf', 'watermarked.pdf', 'CONFIDENTIAL'
17. PPT自动生成目录幻灯片
from pptx import Presentation
from pptx.util import Inches
prs = Presentation('example.pptx')
title_slide_layout = prs.slide_layouts[0]
slide = prs.slides.add_slide(title_slide_layout)
tf = slide.shapes.title.text_frame
tf.text = "Table of Contents"
# 假设已经获取到各幻灯片标题,这里简化处理
titles = ['Introduction', 'Section 1', 'Section 2']
for i, title in enumerate(titles):
bullet_slide_layout = prs.slide_layouts[1]
bullet_slide = prs.slides.add_slide(bullet_slide_layout)
shapes = bullet_slide.shapes
title_shape = shapes.title
body_shape = shapes.placeholders[1]
title_shape.text = title
p = body_shape.text_frame.add_paragraph()
p.text = f"Slide {i+2}"
prs.save('toc_presentation.pptx')
18. Excel图表生成
import matplotlib.pyplot as plt
import pandas as pd
df = pd.DataFrame({'A': [1, 2, 3, 4],
'B': [2, 3, 4, 5]})
df.plot(kind='bar')
plt.savefig('chart.png')
19. Word表格创建
from docx import Document
from docx.table import Table
doc = Document()
table = doc.add_table(rows=1, cols=3)
hdr_cells = table.rows[0].cells
hdr_cells[0].text = 'Name'
hdr_cells[1].text = 'Age'
hdr_cells[2].text = 'City'
doc.save('table.docx')
20. PDF提取图片
from PyPDF2 import PdfReader
from PIL import Image
pdf = PdfReader(open("example.pdf", "rb"))
for i, page in enumerate(pdf.pages):
image_list = page.images
for image_file_object in image_list:
with open(f'image_{i}.png', 'wb') as fp:
fp.write(image_file_object.data)
文末福利
如果你对Python感兴趣的话,可以试试我整理的这一份全套的Python学习资料,【点击这里】领取!
包括:Python激活码+安装包、Python
web开发,Python爬虫,Python数据分析,人工智能、自动化办公等学习教程。带你从零基础系统性的学好Python!
① Python所有方向的学习路线图,清楚各个方向要学什么东西
② 100多节Python课程视频,涵盖必备基础、爬虫和数据分析
③ 100多个Python实战案例,学习不再是只会理论
④ 华为出品独家Python漫画教程,手机也能学习