clickhouse金融数据环境搭建

一、clickhouse建price表

按年份建立每5min的price表。建表语句如下:

CREATE TABLE stock_db.bs_5mins_price_2020
(
    `code` String,
    `open` Float64,
    `high` Float64,
    `low` Float64,
    `close` Float64,
    `volume` Int64,
    `amount` Float64,
    `date` Date,
    `time` String
)
ENGINE = MergeTree
PARTITION BY toYYYYMM(date)
ORDER BY (code, date, time)
SETTINGS index_granularity = 8192;

二、股票基本信息存储

1.交易日信息存储

def save_trade_days_file():
    pro = ts.pro_api()
    df = pro.trade_cal(exchange='SSE', 
    start_date='19900101', 
    end_date='20221231', 
    is_open='1')
    np_cal_date = np.array(df['cal_date']).reshape(-1)
    for i in range(np_cal_date.shape[0]):
        np_cal_date[i] = np_cal_date[i][0:4] + 
        '-' + 
        np_cal_date[i][4:6] + 
        '-' + 
        np_cal_date[i][6:8]
    cal_date_pd = pd.DataFrame(np_cal_date)
    cal_date_pd.rename(columns={0: 'date'}, inplace=True)
    df = df.join(cal_date_pd, how='inner')
    df.rename(columns={'cal_date': 'cal_date_code', 'pretrade_date': 'pretrade_date_code'}, inplace=True)
    df.to_csv('..\\basicData\\trade_days.csv')

2.股票基本信息存储

def save_basic_data_file():
    pro = ts.pro_api()

    # 查询当前所有正常上市交易的股票列表
    ts_data = pro.stock_basic(exchange='', list_status='L',
                              fields='ts_code,symbol,name,area,industry,fullname,enname,cnspell,market,exchange,curr_type,list_status,list_date')
    ts_code_array = np.array(ts_data['ts_code'])
    bs_code_array = ts_code_array.reshape(-1)
    for i in range(ts_code_array.shape[0]):
        if str(ts_code_array[i]).__contains__('.SZ'):
            bs_code_array[i] = str(ts_code_array[i]).replace('.SZ', '')
            bs_code_array[i] = 'sz.' + bs_code_array[i]
        if str(ts_code_array[i]).__contains__('.SH'):
            bs_code_array[i] = str(ts_code_array[i]).replace('.SH', '')
            bs_code_array[i] = 'sh.' + bs_code_array[i]
        if str(ts_code_array[i]).__contains__('.BJ'):
            bs_code_array[i] = str(ts_code_array[i]).replace('.BJ', '')
            bs_code_array[i] = 'bj.' + bs_code_array[i]
    bs_code_data = pd.DataFrame(bs_code_array)
    bs_code_data.rename(columns={0: 'bs_code'}, inplace=True)
    basic_data = bs_code_data.join(ts_data, how='inner')

    nowtime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
    nowtime_array = bs_code_array
    for i in range(nowtime_array.shape[0]):
        nowtime_array[i] = nowtime
    nowtime_pd = pd.DataFrame(nowtime_array)
    nowtime_pd.rename(columns={0: 'edittime'}, inplace=True)
    basic_data = basic_data.join(nowtime_pd, how='inner')
    basic_data .to_csv('..\\basicData\\basic_data.csv')

3.bs_code(baostock格式的code)存储

wait

4.price存储

i = 0;
lg = bs.login()
bs_code_array = get_bs_code_array().reshape(-1)
startDate = '2019-01-01'
endDate = '2019-12-31'
tableName = 'bs_5mins_price_2019'
for bs_code_ith in bs_code_array:
    # if i<1534:
    #     i = i+1
    #     continue
    rs = bs.query_history_k_data_plus(bs_code_ith,
                                      "date,time,code,open,high,low,close,volume,amount",
                                      start_date=startDate, end_date=endDate,
                                      frequency="5", adjustflag="1")

    #### 打印结果集 ####
    data_list = []
    while (rs.error_code == '0') & rs.next():
        # 获取一条记录,将记录合并在一起
        data_list.append(rs.get_row_data())
    result = pd.DataFrame(data_list, columns=rs.fields)
    print('bs_time', time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
    totalNum = saveDataToClickhouse(result, tableName)
    print(i, bs_code_ith, 'totalNum = ', totalNum, 'start_date =', startDate, 'end_date = ', endDate)
    print('save_time', time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
    i = i + 1

三、信息读取

1.bs_code读取

def get_bs_code_array():
    df_date_read = pd.read_csv('..\\basicData\\bs_code.csv', index_col=0, header=0)
    return np.array(df_date_read)

2.price读取

def queryFromClickhouse():
    connection_url = 'http://default:!Sr19940703@192.168.0.122:8123'
    query = 'select * from stock_db.bs_5mins_price_2022'
    df_ = ch2pd.select(connection_url, query)
    return df_
  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值