以下是 5 个真实场景下的 Python 数据库编程案例,涵盖数据导入、Web 应用、数据分析、自动化运维等领域,包含完整代码实现和场景说明:
案例 1:CSV 数据批量导入 MySQL 数据库
场景:将电商订单 CSV 文件批量导入 MySQL 数据库
python
运行
import csv
import mysql.connector
from mysql.connector import Error
def import_csv_to_mysql(csv_file, table_name):
try:
# 建立数据库连接
connection = mysql.connector.connect(
host="localhost",
user="root",
password="password",
database="ecommerce"
)
cursor = connection.cursor()
# 读取CSV文件
with open(csv_file, 'r', encoding='utf-8') as file:
csv_data = csv.reader(file)
headers = next(csv_data) # 获取表头
# 动态生成SQL插入语句
placeholders = ', '.join(['%s'] * len(headers))
columns = ', '.join(headers)
insert_query = f"INSERT INTO {table_name} ({columns}) VALUES ({placeholders})"
# 批量插入数据
for row in csv_data:
cursor.execute(insert_query, row)
connection.commit()
print(f"成功导入 {cursor.rowcount} 条记录")
except Error as e:
print(f"错误: {e}")
finally:
if connection.is_connected():
cursor.close()
connection.close()
# 使用示例
import_csv_to_mysql('orders.csv', 'orders')
案例 2:Flask Web 应用数据库交互
场景:构建一个简单的图书管理系统,实现 CRUD 操作
python
运行
from flask import Flask, request, jsonify
from flask_sqlalchemy import SQLAlchemy
app = Flask(__name__)
app.config['SQLALCHEMY_DATABASE_URI'] = 'sqlite:///books.db'
db = SQLAlchemy(app)
# 定义数据模型
class Book(db.Model):
id = db.Column(db.Integer, primary_key=True)
title = db.Column(db.String(100), nullable=False)
author = db.Column(db.String(100))
year = db.Column(db.Integer)
# 创建所有表
with app.app_context():
db.create_all()
# 获取所有图书
@app.route('/books', methods=['GET'])
def get_books():
books = Book.query.all()
return jsonify([{'id': book.id, 'title': book.title, 'author': book.author, 'year': book.year} for book in books])
# 添加图书
@app.route('/books', methods=['POST'])
def add_book():
data = request.get_json()
new_book = Book(title=data['title'], author=data.get('author'), year=data.get('year'))
db.session.add(new_book)
db.session.commit()
return jsonify({'message': 'Book added successfully'})
if __name__ == '__main__':
app.run(debug=True)
案例 3:数据分析与可视化(SQLite + Matplotlib)
场景:分析销售数据并生成月度销售趋势图
python
运行
import sqlite3
import matplotlib.pyplot as plt
import numpy as np
# 连接数据库
conn = sqlite3.connect('sales_data.db')
cursor = conn.cursor()
# 查询月度销售数据
query = """
SELECT
strftime('%Y-%m', order_date) as month,
SUM(amount) as total_sales
FROM
orders
GROUP BY
month
ORDER BY
month
"""
cursor.execute(query)
results = cursor.fetchall()
# 处理数据
months = [row[0] for row in results]
sales = [row[1] for row in results]
# 可视化
plt.figure(figsize=(12, 6))
plt.bar(months, sales, color='skyblue')
plt.plot(months, sales, 'r-', marker='o')
plt.title('Monthly Sales Trend')
plt.xlabel('Month')
plt.ylabel('Total Sales ($)')
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig('sales_trend.png')
plt.show()
conn.close()
案例 4:数据库定时备份脚本
场景:自动备份 MySQL 数据库到指定目录
python
运行
import os
import subprocess
from datetime import datetime
def backup_mysql_database(host, user, password, database, backup_dir):
# 创建备份目录(如果不存在)
if not os.path.exists(backup_dir):
os.makedirs(backup_dir)
# 生成备份文件名(包含时间戳)
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
backup_file = f"{backup_dir}/{database}_{timestamp}.sql"
# 构建mysqldump命令
cmd = [
'mysqldump',
f'--host={host}',
f'--user={user}',
f'--password={password}',
database,
f'--result-file={backup_file}'
]
try:
# 执行备份命令
subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
print(f"数据库备份成功: {backup_file}")
# 压缩备份文件
zip_cmd = f'gzip {backup_file}'
subprocess.run(zip_cmd, shell=True, check=True)
print(f"备份文件已压缩")
except subprocess.CalledProcessError as e:
print(f"备份失败: {e.stderr.decode()}")
# 使用示例(建议从环境变量获取敏感信息)
backup_mysql_database(
host='localhost',
user='root',
password='password',
database='mydb',
backup_dir='/path/to/backups'
)
案例 5:多线程数据库爬虫数据存储
场景:爬取网站商品信息并存储到 PostgreSQL 数据库
python
运行
import requests
from bs4 import BeautifulSoup
import psycopg2
from psycopg2 import Error
from concurrent.futures import ThreadPoolExecutor
import threading
# 数据库连接配置
db_config = {
'host': 'localhost',
'database': 'products',
'user': 'user',
'password': 'password'
}
# 线程安全的数据库连接池
thread_local = threading.local()
def get_db_connection():
if not hasattr(thread_local, 'connection'):
thread_local.connection = psycopg2.connect(**db_config)
return thread_local.connection
# 爬取单个页面
def scrape_page(url):
try:
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
# 提取商品信息
products = []
for item in soup.select('.product-item'):
name = item.select_one('.product-name').text.strip()
price = float(item.select_one('.product-price').text.replace('$', ''))
products.append((name, price))
# 存储到数据库
conn = get_db_connection()
with conn.cursor() as cursor:
for product in products:
cursor.execute(
"INSERT INTO products (name, price) VALUES (%s, %s) ON CONFLICT DO NOTHING",
product
)
conn.commit()
print(f"成功爬取 {len(products)} 个商品: {url}")
except Error as e:
print(f"爬取失败: {e}")
# 主函数
def main():
base_url = "https://example.com/products?page="
total_pages = 10
# 创建表(如果不存在)
conn = psycopg2.connect(**db_config)
with conn.cursor() as cursor:
cursor.execute("""
CREATE TABLE IF NOT EXISTS products (
id SERIAL PRIMARY KEY,
name TEXT NOT NULL,
price NUMERIC(10, 2) NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
""")
conn.commit()
# 多线程爬取
with ThreadPoolExecutor(max_workers=5) as executor:
urls = [f"{base_url}{page}" for page in range(1, total_pages + 1)]
executor.map(scrape_page, urls)
if __name__ == "__main__":
main()
案例 6:数据库查询性能优化工具
场景:分析 SQL 查询性能并生成优化建议
python
运行
import sqlite3
import time
from sqlalchemy import create_engine, text
def analyze_query_performance(database_url, query, params=None):
"""分析SQL查询性能并生成优化建议"""
engine = create_engine(database_url)
# 执行查询并计时
start_time = time.time()
with engine.connect() as conn:
result = conn.execute(text(query), params or {})
rows = result.fetchall()
execution_time = time.time() - start_time
print(f"查询执行时间: {execution_time:.4f} 秒")
print(f"返回行数: {len(rows)}")
# 生成优化建议(示例逻辑,实际需结合EXPLAIN分析)
suggestions = []
# 检查是否缺少索引
if "WHERE" in query and "JOIN" in query:
suggestions.append("建议检查WHERE和JOIN子句中的字段是否有索引")
# 检查是否全表扫描
if "SELECT *" in query and len(rows) > 1000:
suggestions.append("建议避免使用SELECT *,只选择需要的列")
# 检查大偏移量分页
if "LIMIT" in query and "OFFSET" in query:
offset = int(query.split("OFFSET")[1].split()[0])
if offset > 1000:
suggestions.append("大偏移量分页性能较差,建议使用书签分页或覆盖索引")
return {
"execution_time": execution_time,
"row_count": len(rows),
"suggestions": suggestions
}
# 使用示例
database_url = "sqlite:///mydatabase.db"
query = "SELECT * FROM orders WHERE customer_id = :customer_id AND order_date > :date LIMIT 100"
params = {"customer_id": 123, "date": "2023-01-01"}
analysis = analyze_query_performance(database_url, query, params)
print("\n优化建议:")
for i, suggestion in enumerate(analysis["suggestions"], 1):
print(f"{i}. {suggestion}")
案例 7:数据库数据同步工具
场景:将 MySQL 数据实时同步到 Elasticsearch
python
运行
import pymysql
from pymysqlreplication import BinLogStreamReader
from pymysqlreplication.row_event import WriteRowsEvent, UpdateRowsEvent, DeleteRowsEvent
from elasticsearch import Elasticsearch
# 数据库配置
mysql_config = {
'host': 'localhost',
'port': 3306,
'user': 'repl_user',
'password': 'repl_password',
'db': 'mydatabase',
'charset': 'utf8mb4'
}
# Elasticsearch配置
es = Elasticsearch([{'host': 'localhost', 'port': 9200}])
def sync_to_elasticsearch():
# 创建binlog流
stream = BinLogStreamReader(
connection_settings=mysql_config,
server_id=100,
blocking=True,
resume_stream=True,
only_events=[WriteRowsEvent, UpdateRowsEvent, DeleteRowsEvent]
)
for binlog_event in stream:
for row in binlog_event.rows:
if isinstance(binlog_event, WriteRowsEvent):
# 插入操作
document = row['values']
es.index(index=binlog_event.table, id=document.get('id'), body=document)
print(f"插入: {document}")
elif isinstance(binlog_event, UpdateRowsEvent):
# 更新操作
document = row['after_values']
es.index(index=binlog_event.table, id=document.get('id'), body=document)
print(f"更新: {document}")
elif isinstance(binlog_event, DeleteRowsEvent):
# 删除操作
document_id = row['values'].get('id')
es.delete(index=binlog_event.table, id=document_id)
print(f"删除: ID={document_id}")
stream.close()
if __name__ == "__main__":
sync_to_elasticsearch()