一、知识衔接:从数据存储到自动化输出
前六篇核心能力:
✅ 异常处理 ✅ JSON持久化 ✅ 面向对象设计 ✅ 健壮程序开发
本篇新维度:
graph LR
A[日记本系统] --> B[数据导出]
B --> C[Excel报表]
B --> D[Word总结]
B --> E[PDF报告]
二、环境准备:安装必备库
1. 安装三方库(新手避坑版)
# Excel处理库
pip install openpyxl -i https://pypi.tuna.tsinghua.edu.cn/simple
# Word处理库
pip install python-docx -i https://pypi.tuna.tsinghua.edu.cn/simple
# PDF处理库
pip install PyPDF2 -i https://pypi.tuna.tsinghua.edu.cn/simple
2. 验证安装成功
import openpyxl
from docx import Document
import PyPDF2
print("所有库导入成功!")
三、Excel自动化:生成日记统计报表
1. 创建Excel工作簿
from openpyxl import Workbook
def create_excel_report(data):
wb = Workbook()
ws = wb.active
ws.title = "日记统计"
# 添加表头
headers = ["日期", "字数", "情绪", "关键词"]
ws.append(headers)
# 添加数据
for entry in data["diaries"]:
date = entry["time"][:10]
word_count = len(entry["content"])
ws.append([date, word_count, "待分析", ""])
# 保存文件
wb.save("diary_report.xlsx")
print("Excel报表已生成!")
2. 自动化格式设置
from openpyxl.styles import Font, Alignment
# 设置标题样式
title_font = Font(name='微软雅黑', bold=True)
header_alignment = Alignment(horizontal='center')
# 应用样式
for cell in ws["1:1"]:
cell.font = title_font
cell.alignment = header_alignment
# 自动调整列宽
from openpyxl.utils import get_column_letter
for col in ws.columns:
max_length = 0
column = col[0].column_letter
for cell in col:
try:
if len(str(cell.value)) > max_length:
max_length = len(str(cell.value))
except:
pass
adjusted_width = (max_length + 2) * 1.2
ws.column_dimensions[column].width = adjusted_width
四、Word自动化:生成月度总结
1. 创建标准模板
from docx import Document
from docx.shared import Pt
def create_word_summary(data):
doc = Document()
# 添加标题
title = doc.add_heading('月度日记总结', level=0)
title.alignment = 1 # 居中
# 添加统计段落
p = doc.add_paragraph()
p.add_run("总日记数:").bold = True
p.add_run(f"{len(data['diaries'])}篇\n")
# 添加表格
table = doc.add_table(rows=1, cols=3)
hdr_cells = table.rows[0].cells
hdr_cells[0].text = '日期'
hdr_cells[1].text = '字数'
hdr_cells[2].text = '概要'
# 填充数据
for entry in data["diaries"][-5:]: # 取最后5篇
row_cells = table.add_row().cells
row_cells[0].text = entry["time"][:10]
row_cells[1].text = str(len(entry["content"]))
row_cells[2].text = entry["content"][:15] + "..."
# 保存文件
doc.save("monthly_summary.docx")
print("Word总结已生成!")
五、PDF自动化:创建安全报告
1. 文本转PDF
from reportlab.pdfgen import canvas
def text_to_pdf(filename, content):
c = canvas.Canvas(filename)
text = c.beginText(40, 750) # 坐标位置
# 设置字体
text.setFont("Helvetica", 12)
# 添加内容
for line in content.split('\n'):
text.textLine(line)
if text.getY() < 40: # 换页判断
c.drawText(text)
c.showPage()
text = c.beginText(40, 750)
c.drawText(text)
c.save()
print("PDF报告已生成!")
2. PDF加密保护
from PyPDF2 import PdfWriter, PdfReader
def encrypt_pdf(input_path, password):
reader = PdfReader(input_path)
writer = PdfWriter()
for page in reader.pages:
writer.add_page(page)
writer.encrypt(user_password=password, use_128bit=True)
with open("encrypted_diary.pdf", "wb") as f:
writer.write(f)
print("PDF加密完成!")
六、综合实战:自动化报告系统
class ReportGenerator:
def __init__(self, diary_system):
self.system = diary_system
def generate_reports(self):
try:
data = self.system.load_data()
# 生成Excel
create_excel_report(data)
# 生成Word
create_word_summary(data)
# 生成PDF
with open("temp.txt", "w") as f:
f.write("日记安全备份\n\n")
f.write(f"总日记数:{len(data['diaries'])}\n")
text_to_pdf("diary_backup.pdf", open("temp.txt").read())
encrypt_pdf("diary_backup.pdf", "123456")
print("所有报告生成完成!")
except Exception as e:
print(f"报告生成失败:{str(e)}")
# 记录错误日志...
finally:
if os.path.exists("temp.txt"):
os.remove("temp.txt")
七、避坑指南
1. 常见格式问题解决方案
-
Excel日期显示异常:设置单元格格式为日期格式
from openpyxl.styles import numbers
for cell in ws['A'][1:]:
cell.number_format = numbers.FORMAT_DATE_YYYYMMDD2
-
Word中文乱码:指定中文字体
from docx.oxml.ns import qn
run = p.add_run("中文内容")
run.font.name = '微软雅黑'
run._element.rPr.rFonts.set(qn('w:eastAsia'), '微软雅黑')
2. 文件路径安全处理
import os
def safe_path(filename):
# 过滤非法字符
valid_chars = "-_.() %s%s" % (os.sep, os.extsep)
cleaned = ''.join(c for c in filename if c.isalnum() or c in valid_chars)
return cleaned.strip()
学习路线建议
1. 今日实践任务
# 任务:将日记本系统与自动化报告结合
# 要求:
# - 添加菜单选项"生成报告"
# - 自动打包报告为ZIP文件
# - 添加异常处理防止文件占用错误
2. 效率技巧
-
使用
os.startfile()
自动打开生成的文件
import os
os.startfile("diary_report.xlsx") # Windows系统
-
用
schedule
库定时生成日报
import schedule
schedule.every().day.at("18:00").do(generate_reports)
📢 下期预告:Python网络爬虫入门(安全获取网页数据)
💡 学完本篇你将能:彻底告别手动处理文档,效率提升10倍!