clickhouse金融数据环境搭建
一、clickhouse建price表
按年份建立每5min的price表。建表语句如下:
CREATE TABLE stock_db.bs_5mins_price_2020
(
`code` String,
`open` Float64,
`high` Float64,
`low` Float64,
`close` Float64,
`volume` Int64,
`amount` Float64,
`date` Date,
`time` String
)
ENGINE = MergeTree
PARTITION BY toYYYYMM(date)
ORDER BY (code, date, time)
SETTINGS index_granularity = 8192;
二、股票基本信息存储
1.交易日信息存储
def save_trade_days_file():
pro = ts.pro_api()
df = pro.trade_cal(exchange='SSE',
start_date='19900101',
end_date='20221231',
is_open='1')
np_cal_date = np.array(df['cal_date']).reshape(-1)
for i in range(np_cal_date.shape[0]):
np_cal_date[i] = np_cal_date[i][0:4] +
'-' +
np_cal_date[i][4:6] +
'-' +
np_cal_date[i][6:8]
cal_date_pd = pd.DataFrame(np_cal_date)
cal_date_pd.rename(columns={0: 'date'}, inplace=True)
df = df.join(cal_date_pd, how='inner')
df.rename(columns={'cal_date': 'cal_date_code', 'pretrade_date': 'pretrade_date_code'}, inplace=True)
df.to_csv('..\\basicData\\trade_days.csv')
2.股票基本信息存储
def save_basic_data_file():
pro = ts.pro_api()
# 查询当前所有正常上市交易的股票列表
ts_data = pro.stock_basic(exchange='', list_status='L',
fields='ts_code,symbol,name,area,industry,fullname,enname,cnspell,market,exchange,curr_type,list_status,list_date')
ts_code_array = np.array(ts_data['ts_code'])
bs_code_array = ts_code_array.reshape(-1)
for i in range(ts_code_array.shape[0]):
if str(ts_code_array[i]).__contains__('.SZ'):
bs_code_array[i] = str(ts_code_array[i]).replace('.SZ', '')
bs_code_array[i] = 'sz.' + bs_code_array[i]
if str(ts_code_array[i]).__contains__('.SH'):
bs_code_array[i] = str(ts_code_array[i]).replace('.SH', '')
bs_code_array[i] = 'sh.' + bs_code_array[i]
if str(ts_code_array[i]).__contains__('.BJ'):
bs_code_array[i] = str(ts_code_array[i]).replace('.BJ', '')
bs_code_array[i] = 'bj.' + bs_code_array[i]
bs_code_data = pd.DataFrame(bs_code_array)
bs_code_data.rename(columns={0: 'bs_code'}, inplace=True)
basic_data = bs_code_data.join(ts_data, how='inner')
nowtime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
nowtime_array = bs_code_array
for i in range(nowtime_array.shape[0]):
nowtime_array[i] = nowtime
nowtime_pd = pd.DataFrame(nowtime_array)
nowtime_pd.rename(columns={0: 'edittime'}, inplace=True)
basic_data = basic_data.join(nowtime_pd, how='inner')
basic_data .to_csv('..\\basicData\\basic_data.csv')
3.bs_code(baostock格式的code)存储
wait
4.price存储
i = 0;
lg = bs.login()
bs_code_array = get_bs_code_array().reshape(-1)
startDate = '2019-01-01'
endDate = '2019-12-31'
tableName = 'bs_5mins_price_2019'
for bs_code_ith in bs_code_array:
# if i<1534:
# i = i+1
# continue
rs = bs.query_history_k_data_plus(bs_code_ith,
"date,time,code,open,high,low,close,volume,amount",
start_date=startDate, end_date=endDate,
frequency="5", adjustflag="1")
#### 打印结果集 ####
data_list = []
while (rs.error_code == '0') & rs.next():
# 获取一条记录,将记录合并在一起
data_list.append(rs.get_row_data())
result = pd.DataFrame(data_list, columns=rs.fields)
print('bs_time', time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
totalNum = saveDataToClickhouse(result, tableName)
print(i, bs_code_ith, 'totalNum = ', totalNum, 'start_date =', startDate, 'end_date = ', endDate)
print('save_time', time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
i = i + 1
三、信息读取
1.bs_code读取
def get_bs_code_array():
df_date_read = pd.read_csv('..\\basicData\\bs_code.csv', index_col=0, header=0)
return np.array(df_date_read)
2.price读取
def queryFromClickhouse():
connection_url = 'http://default:!Sr19940703@192.168.0.122:8123'
query = 'select * from stock_db.bs_5mins_price_2022'
df_ = ch2pd.select(connection_url, query)
return df_