第一步创建数据库连接
def connect():
try:
db = pymysql.connect(
host='localhost',
port=3306,
user='root',
passwd='mima',
charset='utf8'
)
return db
except Exception:
raise Exception("数据库连接失败")
第二步批量插入数据
def update_data_multi_rds(df, database_table, batch_num):
"""
通过dataframe 向 rds 中批量插入数据
:param df: pd.Dataframe类型
:param database_table: 插入的表名
:param batch_num: 插入每批数据的大小
:return:
"""
db = connect()
cur = db.cursor()
n = math.ceil(len(df) / batch_num)
for i in range(n):
dataframe = df.iloc[i * batch_num:(i + 1) * batch_num, :]
keys = dataframe.keys()
values = dataframe.values.tolist()
key_sql = ','.join(keys)
value_sql = ','.join(['%s'] * dataframe.shape[1])
insert_data_str = """ insert into %s (%s) values (%s) ON DUPLICATE KEY UPDATE""" % (
database_table, key_sql, value_sql)
update_str = ','.join([" {key} = VALUES({key})".format(key=key) for key in keys])
insert_data_str += update_str
try:
cur.executemany(insert_data_str, values)
db.commit()
except Exception as e:
print(e)
db.rollback()
cur.close()
db.close()
第三步测试
def select_talbe():
db = connect()
sql = "SELECT * FROM mall.`user`"
df = pd.read_sql(sql, db)
db.close()
update_data_multi_rds(df, "mall.user_1", 100)
select_talbe()
完整代码
import math
import pymysql
import pandas as pd
def connect():
try:
db = pymysql.connect(
host='localhost',
port=3306,
user='root',
passwd='admin',
charset='utf8'
)
return db
except Exception:
raise Exception("数据库连接失败")
def update_data_multi_rds(df, database_table, batch_num):
"""
通过dataframe 向 rds 中批量插入数据
:param df: pd.Dataframe类型
:param database_table: 插入的表名
:param batch_num: 插入每批数据的大小
:return:
"""
db = connect()
cur = db.cursor()
n = math.ceil(len(df) / batch_num)
for i in range(n):
dataframe = df.iloc[i * batch_num:(i + 1) * batch_num, :]
keys = dataframe.keys()
values = dataframe.values.tolist()
key_sql = ','.join(keys)
value_sql = ','.join(['%s'] * dataframe.shape[1])
insert_data_str = """ insert into %s (%s) values (%s) ON DUPLICATE KEY UPDATE""" % (
database_table, key_sql, value_sql)
update_str = ','.join([" {key} = VALUES({key})".format(key=key) for key in keys])
insert_data_str += update_str
try:
cur.executemany(insert_data_str, values)
db.commit()
except Exception as e:
print(e)
db.rollback()
cur.close()
db.close()
def select_talbe():
db = connect()
sql = "SELECT * FROM mall.`user`"
df = pd.read_sql(sql, db)
db.close()
update_data_multi_rds(df, "mall.user_1", 100)
if __name__ == '__main__':
select_talbe()