保存csv到mysql的通用脚本

import os
import pandas as pd
import numpy as np
import mysql.connector

def connect_to_db(host, user, password, database):
    return mysql.connector.connect(
        host=host,
        user=user,
        password=password,
        database=database,
        connection_timeout=600,
        buffered=True
    )

def create_table(cursor, table_name, columns, charset, column_data_types=None):
    if column_data_types is None:
        column_data_types = {}
    columns_sql = ', '.join(
        [f"`{col}` {column_data_types.get(col, 'VARCHAR(100)')}" for col in columns]
    )
    create_table_sql = f"CREATE TABLE `{table_name}` ({columns_sql}) CHARACTER SET {charset}"
    cursor.execute(f"DROP TABLE IF EXISTS `{table_name}`")  # Ensure table does not exist before creating it
    cursor.execute(create_table_sql)

def insert_data(cursor, table_name, columns, data):
    placeholders = ', '.join(['%s'] * len(columns))
    columns_sql = ', '.join([f"`{col}`" for col in columns])
    insert_sql = f"INSERT INTO `{table_name}` ({columns_sql}) VALUES ({placeholders})"
    cursor.executemany(insert_sql, data)

def read_csv_with_fallback(file_path, chunk_size=50000):
    try:
        print(f"Pandas first Reading {file_path} with UTF-8 encoding.")
        return pd.read_csv(file_path, chunksize=chunk_size, encoding='utf-8', low_memory=False), 'utf-8'
    except UnicodeDecodeError:
        print(f"Failed to read {file_path} with UTF-8 encoding. Trying GBK encoding.")
        return pd.read_csv(file_path, chunksize=chunk_size, encoding='gbk', low_memory=False), 'gbk'

def save_all(file_path, cursor, temp_table_name, column_data_types, chunk_size):
    first_chunk, encoding = read_csv_with_fallback(file_path, chunk_size)
    charset = 'utf8mb4' if encoding == 'utf-8' else 'gbk'
    columns = None
    retry_columns = column_data_types if column_data_types else {}

    for i, df in enumerate(first_chunk):
        if i == 0:
            columns = df.columns.tolist()
            create_table(cursor, temp_table_name, columns, charset, retry_columns)

        data = df.replace({np.nan: None}).values.tolist()  # Replace NaN with None

        try:
            insert_data(cursor, temp_table_name, columns, data)
        except mysql.connector.Error as e:
            if e.errno == mysql.connector.errorcode.ER_DATA_TOO_LONG:
                print(f"{temp_table_name}: Data too long error: {e}")
                # Extract the column name that caused the error
                error_msg = str(e)
                col_name_start = error_msg.find("'") + 1
                col_name_end = error_msg.find("'", col_name_start)
                col_name = error_msg[col_name_start:col_name_end]

                print(f"Column `{col_name}` data is too long. Recreating table with `{col_name}` as larger data type.")

                # Update the column data type
                if col_name in retry_columns:
                    if retry_columns[col_name] == 'TEXT':
                        retry_columns[col_name] = 'MEDIUMTEXT'
                    else:
                        print(f"Column `{col_name}` already set to MEDIUMTEXT and still too large.")
                        raise
                else:
                    retry_columns[col_name] = 'TEXT'

                # Drop the temp table if it exists
                cursor.execute(f"DROP TABLE IF EXISTS `{temp_table_name}`")

                # Recreate the table with updated column data types
                print('重新从头开始插入数据...')
                return (False, retry_columns)
            else:
                print(f"Failed to insert data into table `{temp_table_name}`. Error: {e}")
                raise
    return (True, retry_columns)  # Signal successful insertion

def process_csv_file(file_path, db_connection, chunk_size=50000):
    original_table_name = os.path.splitext(os.path.basename(file_path))[0]
    temp_table_name = f"{original_table_name}_temp"
    
    cursor = db_connection.cursor()
    try:
        db_connection.start_transaction()
        
        success, column_data_types = save_all(file_path, cursor, temp_table_name, None, chunk_size)
        
        while not success:
            success, column_data_types = save_all(file_path, cursor, temp_table_name, column_data_types, chunk_size)
        
        db_connection.commit()
        cursor.execute(f"RENAME TABLE `{temp_table_name}` TO `{original_table_name}`")
        print(f"Table `{original_table_name}` created and data inserted successfully.")
    except Exception as e:
        db_connection.rollback()
        print(f"Transaction rolled back due to error: {e}")
        cursor.execute(f"DROP TABLE IF EXISTS `{temp_table_name}`")
    finally:
        cursor.close()

def process_csv_files(directory, db_connection, chunk_size=50000):
    for filename in os.listdir(directory):
        if filename.endswith('.csv'):
            file_path = os.path.join(directory, filename)
            try:
                process_csv_file(file_path, db_connection, chunk_size)
            except Exception as e:
                print(f"Error processing file `{filename}`: {e}")

if __name__ == "__main__":
    db_config = {
        'host': '127.0.0.1',
        'user': 'root',
        'password': 'password',
        'database': 'db'
    }

    csv_directory = r'./data/data.csv'

    db_connection = connect_to_db(**db_config)

    try:
        process_csv_files(csv_directory, db_connection)
    finally:
        db_connection.close()
  • 2
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
智慧校园整体解决方案是响应国家教育信息化政策,结合教育改革和技术创新的产物。该方案以物联网、大数据、人工智能和移动互联技术为基础,旨在打造一个安全、高效、互动且环保的教育环境。方案强调从数字化校园向智慧校园的转变,通过自动数据采集、智能分析和按需服务,实现校园业务的智能化管理。 方案的总体设计原则包括应用至上、分层设计和互联互通,确保系统能够满足不同用户角色的需求,并实现数据和资源的整合与共享。框架设计涵盖了校园安全、管理、教学、环境等多个方面,构建了一个全面的校园应用生态系统。这包括智慧安全系统、校园身份识别、智能排课及选课系统、智慧学习系统、精品录播教室方案等,以支持个性化学习和教学评估。 建设内容突出了智慧安全和智慧管理的重要性。智慧安全管理通过分布式录播系统和紧急预案一键启动功能,增强校园安全预警和事件响应能力。智慧管理系统则利用物联网技术,实现人员和设备的智能管理,提高校园运营效率。 智慧教学部分,方案提供了智慧学习系统和精品录播教室方案,支持专业级学习硬件和智能化网络管理,促进个性化学习和教学资源的高效利用。同时,教学质量评估中心和资源应用平台的建设,旨在提升教学评估的科学性和教育资源的共享性。 智慧环境建设则侧重于基于物联网的设备管理,通过智慧教室管理系统实现教室环境的智能控制和能效管理,打造绿色、节能的校园环境。电子班牌和校园信息发布系统的建设,将作为智慧校园的核心和入口,提供教务、一卡通、图书馆等系统的集成信息。 总体而言,智慧校园整体解决方案通过集成先进技术,不仅提升了校园的信息化水平,而且优化了教学和管理流程,为学生、教师和家长提供了更加便捷、个性化的教育体验。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值