读取数据库数据,每10m写入一个文件,最后将所有文件写入压缩包

该代码示例展示了一个Python脚本,它连接到MySQL数据库,执行查询以获取数据,然后将数据分割成多个文件,每个文件不超过10MB。数据被写入内存中的BytesIO对象,然后这些数据流被添加到ZIP压缩文件中。最后,压缩文件以特定的时间戳命名并保存到指定目录。
摘要由CSDN通过智能技术生成

代码如下:

import os
import io
import mysql.connector
from pytz import timezone
from datetime import datetime
from pathlib import Path
import zipfile


def get_connection():
    return mysql.connector.connect(
        host='127.0.0.1',
        user='root',
        password='',
        database='mydb',
        port=3306
    )


def fetch_data(cursor, chunk_size):
    while True:
        rows = cursor.fetchmany(chunk_size)
        if not rows:
            break
        yield rows


def create_file_data(chunk_size):
    connection = get_connection()
    cursor = connection.cursor()

    query = "SELECT * FROM test limit 1000000"
    cursor.execute(query)

    current_size = 0
    max_size = 10 * 1024 * 1024  # 10MB in bytes

    zip_data = io.BytesIO()
    with zipfile.ZipFile(zip_data, 'w', zipfile.ZIP_DEFLATED) as zipf:
        count = 0
        current_file_data = io.BytesIO()

        for rows in fetch_data(cursor, chunk_size):
            for row in rows:
                row_str = ",".join(str(cell) for cell in row)
                row_size = len(row_str.encode('utf-8'))
                if current_size + row_size > max_size:
                    current_file_data.seek(0)
                    zipf.writestr(f"data_chunk_{count}.txt", current_file_data.read())
                    current_file_data.truncate(0)
                    current_size = 0
                    count += 1

                current_file_data.write(row_str.encode('utf-8') + b"\n")
                current_size += row_size

        current_file_data.seek(0)
        zipf.writestr(f"data_chunk_{count}.txt", current_file_data.read())

    cursor.close()
    connection.close()
    return zip_data.getvalue()


def compress_files(output_dir, zip_filename):
    with zipfile.ZipFile(zip_filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
        for file in output_dir.iterdir():
            zipf.write(file, file.name)


def main():
    chunk_size = 1000  # Adjust this value based on your data size and memory constraints
    zip_data = create_file_data(chunk_size)
    output_dir = Path("output_data")
    output_dir.mkdir(exist_ok=True)
    now = datetime.now(timezone('UTC')).astimezone(timezone('Asia/Shanghai'))
    zip_filename = f"./data_export_{now.strftime('%Y%m%d_%H%M%S')}.zip"
    with open(zip_filename, "wb") as f:
        f.write(zip_data)


if __name__ == "__main__":
    main()

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

GC-757

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值