相关包:clickhouse_driver、pandas、sqlalchemy
1、链接并从clickhouse中获取相关数据
client = Client(host='你的host', port=9068, user='你的user', password='你的pwd', database='你的数据库名', send_receive_timeout=5) # kpi_sql 为sql语句 def kpi_sql(s_time,e_time): sql=f""" select * from db.table_name where `SDATE`>=parseDateTimeBestEffort(toString({s_time})) and `SDATE`<= parseDateTimeBestEffort(toString({e_time})) """ return sql kpidata = client.execute(kpi_sql(s_time,e_time)) # column_name 为对应的列名,即sql查询结果列,传入list格式:column_name=["a","b"] kpidata = pd.DataFrame(kpidata, columns=column_name)
2、存入mysql
# 通过sqlalchemy建立于mysql链接,通过pandas的to_sql传入对应表
pm_data = create_engine(f'mysql+pymysql://{user}:{pwd}@host:port/databse?charset=utf8')
kpidata.to_sql(name=table_name, con=pm_data, if_exists="append", index=False, index_label=False)
import pandas as pd
from loguru import logger
from clickhouse_driver import Client
from sqlalchemy import create_engine
import datetime
from datetime import timedelta
from urllib import parse
import numpy as np
def get_noise_five(s_time,e_time):
sql=f""" sql """
return sql
def get_noise_four(s_time,e_time):
sql = f""" sql """
return sql
def get_lastest_time(table):
sql = f"""sql"""
return sql
def cktomysql(a,s_time,e_time,column_name,table_name):
if a=="4g":
kpi_sql = get_noise_four(s_time=s_time,e_time=e_time)
else :
kpi_sql = get_noise_five(s_time=s_time,e_time=e_time)
kpidata = client.execute(kpi_sql)
kpidata = pd.DataFrame(kpidata, columns=column_name)
kpidata.to_sql(name=table_name, con=pm_data, if_exists="append", index=False, index_label=False)
if __name__ == '__main__':
# 连接数据库
password = "pwd"
pwd = parse.quote_plus(password)
pm_data = create_engine(f'mysql+pymysql://user:{pwd}@host:port/db?charset=utf8')
client = Client(host='host', port=port, user='user', password='pwd', database='db', send_receive_timeout=5)
# 设置查询数据的时间及表格名称
days = datetime.datetime.now().strftime("%Y%m%d")
four_table_name = "example" + str(days)
five_table_name = "example" + str(days)
stime = (datetime.datetime.now()-timedelta(hours=1)).strftime("%Y%m%d%H0000")
etime = datetime.datetime.now().strftime("%Y%m%d%H%M00")
logger.info(f"获取的时间段为{stime}至{etime}")
five_columns = []
four_columns = []
get_list = [{"net_type":"4g","columns":four_columns,"table":four_table_name},{"net_type":"5g","columns":five_columns,"table":five_table_name}]
for dict1 in get_list:
cktomysql(a=dict1["net_type"],s_time = stime,e_time = etime,column_name=dict1["columns"],table_name=dict1["table"])