Python数据库编程案例

以下是 5 个真实场景下的 Python 数据库编程案例,涵盖数据导入、Web 应用、数据分析、自动化运维等领域,包含完整代码实现和场景说明:
案例 1:CSV 数据批量导入 MySQL 数据库
场景:将电商订单 CSV 文件批量导入 MySQL 数据库
python
运行
import csv
import mysql.connector
from mysql.connector import Error

def import_csv_to_mysql(csv_file, table_name):
    try:
        # 建立数据库连接
        connection = mysql.connector.connect(
            host="localhost",
            user="root",
            password="password",
            database="ecommerce"
        )
        
        cursor = connection.cursor()
        
        # 读取CSV文件
        with open(csv_file, 'r', encoding='utf-8') as file:
            csv_data = csv.reader(file)
            headers = next(csv_data)  # 获取表头
            
            # 动态生成SQL插入语句
            placeholders = ', '.join(['%s'] * len(headers))
            columns = ', '.join(headers)
            insert_query = f"INSERT INTO {table_name} ({columns}) VALUES ({placeholders})"
            
            # 批量插入数据
            for row in csv_data:
                cursor.execute(insert_query, row)
            
            connection.commit()
            print(f"成功导入 {cursor.rowcount} 条记录")
    
    except Error as e:
        print(f"错误: {e}")
    finally:
        if connection.is_connected():
            cursor.close()
            connection.close()

# 使用示例
import_csv_to_mysql('orders.csv', 'orders')
案例 2:Flask Web 应用数据库交互
场景:构建一个简单的图书管理系统,实现 CRUD 操作
python
运行
from flask import Flask, request, jsonify
from flask_sqlalchemy import SQLAlchemy

app = Flask(__name__)
app.config['SQLALCHEMY_DATABASE_URI'] = 'sqlite:///books.db'
db = SQLAlchemy(app)

# 定义数据模型
class Book(db.Model):
    id = db.Column(db.Integer, primary_key=True)
    title = db.Column(db.String(100), nullable=False)
    author = db.Column(db.String(100))
    year = db.Column(db.Integer)

# 创建所有表
with app.app_context():
    db.create_all()

# 获取所有图书
@app.route('/books', methods=['GET'])
def get_books():
    books = Book.query.all()
    return jsonify([{'id': book.id, 'title': book.title, 'author': book.author, 'year': book.year} for book in books])

# 添加图书
@app.route('/books', methods=['POST'])
def add_book():
    data = request.get_json()
    new_book = Book(title=data['title'], author=data.get('author'), year=data.get('year'))
    db.session.add(new_book)
    db.session.commit()
    return jsonify({'message': 'Book added successfully'})

if __name__ == '__main__':
    app.run(debug=True)
案例 3:数据分析与可视化(SQLite + Matplotlib)
场景:分析销售数据并生成月度销售趋势图
python
运行
import sqlite3
import matplotlib.pyplot as plt
import numpy as np

# 连接数据库
conn = sqlite3.connect('sales_data.db')
cursor = conn.cursor()

# 查询月度销售数据
query = """
SELECT 
    strftime('%Y-%m', order_date) as month,
    SUM(amount) as total_sales
FROM 
    orders
GROUP BY 
    month
ORDER BY 
    month
"""

cursor.execute(query)
results = cursor.fetchall()

# 处理数据
months = [row[0] for row in results]
sales = [row[1] for row in results]

# 可视化
plt.figure(figsize=(12, 6))
plt.bar(months, sales, color='skyblue')
plt.plot(months, sales, 'r-', marker='o')
plt.title('Monthly Sales Trend')
plt.xlabel('Month')
plt.ylabel('Total Sales ($)')
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig('sales_trend.png')
plt.show()

conn.close()
案例 4:数据库定时备份脚本
场景:自动备份 MySQL 数据库到指定目录
python
运行
import os
import subprocess
from datetime import datetime

def backup_mysql_database(host, user, password, database, backup_dir):
    # 创建备份目录(如果不存在)
    if not os.path.exists(backup_dir):
        os.makedirs(backup_dir)
    
    # 生成备份文件名(包含时间戳)
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    backup_file = f"{backup_dir}/{database}_{timestamp}.sql"
    
    # 构建mysqldump命令
    cmd = [
        'mysqldump',
        f'--host={host}',
        f'--user={user}',
        f'--password={password}',
        database,
        f'--result-file={backup_file}'
    ]
    
    try:
        # 执行备份命令
        subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        print(f"数据库备份成功: {backup_file}")
        
        # 压缩备份文件
        zip_cmd = f'gzip {backup_file}'
        subprocess.run(zip_cmd, shell=True, check=True)
        print(f"备份文件已压缩")
        
    except subprocess.CalledProcessError as e:
        print(f"备份失败: {e.stderr.decode()}")

# 使用示例(建议从环境变量获取敏感信息)
backup_mysql_database(
    host='localhost',
    user='root',
    password='password',
    database='mydb',
    backup_dir='/path/to/backups'
)
案例 5:多线程数据库爬虫数据存储
场景:爬取网站商品信息并存储到 PostgreSQL 数据库
python
运行
import requests
from bs4 import BeautifulSoup
import psycopg2
from psycopg2 import Error
from concurrent.futures import ThreadPoolExecutor
import threading

# 数据库连接配置
db_config = {
    'host': 'localhost',
    'database': 'products',
    'user': 'user',
    'password': 'password'
}

# 线程安全的数据库连接池
thread_local = threading.local()

def get_db_connection():
    if not hasattr(thread_local, 'connection'):
        thread_local.connection = psycopg2.connect(**db_config)
    return thread_local.connection

# 爬取单个页面
def scrape_page(url):
    try:
        response = requests.get(url)
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # 提取商品信息
        products = []
        for item in soup.select('.product-item'):
            name = item.select_one('.product-name').text.strip()
            price = float(item.select_one('.product-price').text.replace('$', ''))
            products.append((name, price))
        
        # 存储到数据库
        conn = get_db_connection()
        with conn.cursor() as cursor:
            for product in products:
                cursor.execute(
                    "INSERT INTO products (name, price) VALUES (%s, %s) ON CONFLICT DO NOTHING",
                    product
                )
            conn.commit()
            
        print(f"成功爬取 {len(products)} 个商品: {url}")
        
    except Error as e:
        print(f"爬取失败: {e}")

# 主函数
def main():
    base_url = "https://example.com/products?page="
    total_pages = 10
    
    # 创建表(如果不存在)
    conn = psycopg2.connect(**db_config)
    with conn.cursor() as cursor:
        cursor.execute("""
        CREATE TABLE IF NOT EXISTS products (
            id SERIAL PRIMARY KEY,
            name TEXT NOT NULL,
            price NUMERIC(10, 2) NOT NULL,
            created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
        )
        """)
        conn.commit()
    
    # 多线程爬取
    with ThreadPoolExecutor(max_workers=5) as executor:
        urls = [f"{base_url}{page}" for page in range(1, total_pages + 1)]
        executor.map(scrape_page, urls)

if __name__ == "__main__":
    main()
案例 6:数据库查询性能优化工具
场景:分析 SQL 查询性能并生成优化建议
python
运行
import sqlite3
import time
from sqlalchemy import create_engine, text

def analyze_query_performance(database_url, query, params=None):
    """分析SQL查询性能并生成优化建议"""
    engine = create_engine(database_url)
    
    # 执行查询并计时
    start_time = time.time()
    with engine.connect() as conn:
        result = conn.execute(text(query), params or {})
        rows = result.fetchall()
    execution_time = time.time() - start_time
    
    print(f"查询执行时间: {execution_time:.4f} 秒")
    print(f"返回行数: {len(rows)}")
    
    # 生成优化建议(示例逻辑,实际需结合EXPLAIN分析)
    suggestions = []
    
    # 检查是否缺少索引
    if "WHERE" in query and "JOIN" in query:
        suggestions.append("建议检查WHERE和JOIN子句中的字段是否有索引")
    
    # 检查是否全表扫描
    if "SELECT *" in query and len(rows) > 1000:
        suggestions.append("建议避免使用SELECT *,只选择需要的列")
    
    # 检查大偏移量分页
    if "LIMIT" in query and "OFFSET" in query:
        offset = int(query.split("OFFSET")[1].split()[0])
        if offset > 1000:
            suggestions.append("大偏移量分页性能较差,建议使用书签分页或覆盖索引")
    
    return {
        "execution_time": execution_time,
        "row_count": len(rows),
        "suggestions": suggestions
    }

# 使用示例
database_url = "sqlite:///mydatabase.db"
query = "SELECT * FROM orders WHERE customer_id = :customer_id AND order_date > :date LIMIT 100"
params = {"customer_id": 123, "date": "2023-01-01"}

analysis = analyze_query_performance(database_url, query, params)

print("\n优化建议:")
for i, suggestion in enumerate(analysis["suggestions"], 1):
    print(f"{i}. {suggestion}")
案例 7:数据库数据同步工具
场景:将 MySQL 数据实时同步到 Elasticsearch
python
运行
import pymysql
from pymysqlreplication import BinLogStreamReader
from pymysqlreplication.row_event import WriteRowsEvent, UpdateRowsEvent, DeleteRowsEvent
from elasticsearch import Elasticsearch

# 数据库配置
mysql_config = {
    'host': 'localhost',
    'port': 3306,
    'user': 'repl_user',
    'password': 'repl_password',
    'db': 'mydatabase',
    'charset': 'utf8mb4'
}

# Elasticsearch配置
es = Elasticsearch([{'host': 'localhost', 'port': 9200}])

def sync_to_elasticsearch():
    # 创建binlog流
    stream = BinLogStreamReader(
        connection_settings=mysql_config,
        server_id=100,
        blocking=True,
        resume_stream=True,
        only_events=[WriteRowsEvent, UpdateRowsEvent, DeleteRowsEvent]
    )
    
    for binlog_event in stream:
        for row in binlog_event.rows:
            if isinstance(binlog_event, WriteRowsEvent):
                # 插入操作
                document = row['values']
                es.index(index=binlog_event.table, id=document.get('id'), body=document)
                print(f"插入: {document}")
            
            elif isinstance(binlog_event, UpdateRowsEvent):
                # 更新操作
                document = row['after_values']
                es.index(index=binlog_event.table, id=document.get('id'), body=document)
                print(f"更新: {document}")
            
            elif isinstance(binlog_event, DeleteRowsEvent):
                # 删除操作
                document_id = row['values'].get('id')
                es.delete(index=binlog_event.table, id=document_id)
                print(f"删除: ID={document_id}")
    
    stream.close()

if __name__ == "__main__":
    sync_to_elasticsearch()
 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值