代码如下:
import os
import io
import mysql.connector
from pytz import timezone
from datetime import datetime
from pathlib import Path
import zipfile
def get_connection():
return mysql.connector.connect(
host='127.0.0.1',
user='root',
password='',
database='mydb',
port=3306
)
def fetch_data(cursor, chunk_size):
while True:
rows = cursor.fetchmany(chunk_size)
if not rows:
break
yield rows
def create_file_data(chunk_size):
connection = get_connection()
cursor = connection.cursor()
query = "SELECT * FROM test limit 1000000"
cursor.execute(query)
current_size = 0
max_size = 10 * 1024 * 1024 # 10MB in bytes
zip_data = io.BytesIO()
with zipfile.ZipFile(zip_data, 'w', zipfile.ZIP_DEFLATED) as zipf:
count = 0
current_file_data = io.BytesIO()
for rows in fetch_data(cursor, chunk_size):
for row in rows:
row_str = ",".join(str(cell) for cell in row)
row_size = len(row_str.encode('utf-8'))
if current_size + row_size > max_size:
current_file_data.seek(0)
zipf.writestr(f"data_chunk_{count}.txt", current_file_data.read())
current_file_data.truncate(0)
current_size = 0
count += 1
current_file_data.write(row_str.encode('utf-8') + b"\n")
current_size += row_size
current_file_data.seek(0)
zipf.writestr(f"data_chunk_{count}.txt", current_file_data.read())
cursor.close()
connection.close()
return zip_data.getvalue()
def compress_files(output_dir, zip_filename):
with zipfile.ZipFile(zip_filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
for file in output_dir.iterdir():
zipf.write(file, file.name)
def main():
chunk_size = 1000 # Adjust this value based on your data size and memory constraints
zip_data = create_file_data(chunk_size)
output_dir = Path("output_data")
output_dir.mkdir(exist_ok=True)
now = datetime.now(timezone('UTC')).astimezone(timezone('Asia/Shanghai'))
zip_filename = f"./data_export_{now.strftime('%Y%m%d_%H%M%S')}.zip"
with open(zip_filename, "wb") as f:
f.write(zip_data)
if __name__ == "__main__":
main()
该代码示例展示了一个Python脚本,它连接到MySQL数据库,执行查询以获取数据,然后将数据分割成多个文件,每个文件不超过10MB。数据被写入内存中的BytesIO对象,然后这些数据流被添加到ZIP压缩文件中。最后,压缩文件以特定的时间戳命名并保存到指定目录。

被折叠的 条评论
为什么被折叠?



