class Database:
def __init__(self):
self.save_schema = 'dwd'
self.save_table = 'dwd_xxx'
self.mysql_dwd_config = {
'drivername': 'mysql+pymysql',
'username': 'user_a',
'password': 'xxx@#$xxx',
'host': 'am-xxxxx.ads.aliyuncs.com',
'port': 3306,
}
if sqlalchemy.__version__ >= '1.4':
self.mysql_engine_url = sqlalchemy.engine.URL.create(**self.mysql_dwd_config)
self.mysql_engine_url = self.mysql_engine_url.update_query_dict({'charset': 'utf8mb4'})
else:
self.mysql_engine_url = '{drivername}://{username}:{password}@{host}:{port}/?charset=utf8mb4'.format(**self.mysql_dwd_config)
self.mysql_dwd_engine = sqlalchemy.create_engine(self.mysql_engine_url)
@Usual.time_stat
def drop_mysql(self):
sql = f"DROP TABLE IF EXISTS {self.save_schema}.{self.save_table};"
with self.mysql_dwd_engine.connect() as conn:
conn.execute(sqlalchemy.text(sql))
print('drop_table_done.')
return
@Usual.time_stat
def get_mysql_data(self, sql, chunksize=None):
if not chunksize:
new_df = pd.read_sql(sql=sql, con=db.mysql_dwd_engine, chunksize=None)
else:
df_iter = pd.read_sql(sql=sql, con=db.mysql_dwd_engine, chunksize=chunksize)
new_df = pd.DataFrame()
for temp_df in df_iter:
new_df = new_df.append(temp_df, ignore_index=True)
print(f'get_df shape: {new_df.shape}')
return new_df
@Usual.time_stat
def write_mysql_data(self, df, if_exists='replace'):
df.to_sql(
con=self.mysql_dwd_engine,
schema=self.save_schema,
name=self.save_table,
if_exists=if_exists,
index=False,
index_label='id',
chunksize=10000,
)
@Usual.time_stat
def save_csv_feature_to_db(self, csv='/tmp/xxx_feature.csv', chunksize=1000):
with pandas.read_csv(csv, iterator=True, chunksize=chunksize) as reader:
for i,df in enumerate(reader):
df = df.round(4)
self.write_mysql_data(df, if_exists='append')
logger.info(f'Saved to db: {(i+1)*chunksize}')
return
create schema your_schema collate utf8mb4_unicode_ci;