显然,股票的OHCL为高相关性的数据(Open,High,Close,Low),喂给神经网络时会带来较少的信息,本文介绍了两种将OHCLV转化为金融技术指标的办法,从而减少相关性,更好的描述动态,复杂的,非线性的,充满噪音的股票市场。
首先本文使用调用tushare库提取股票历史数据。
pip install tushare
#获取使用接口
def get_token():
ts.set_token("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx")
pro=ts.pro_api()
return pro
##获取数据列表
def get_data_list(cursor,sql,conn):
cursor.execute(sql)
res=cursor.fetchall()
conn.commit()
ts_codes_list=list(res)
ts_codes_list=[",".join(list(x)) for x in ts_codes_list]
return ts_codes_list
##获取数据
def get_data(ts_codes_list,pro):
daily=pd.DataFrame(columns=["ts_code","trade_date","open","close","high","low","volume"]) ##获取相应的列信息
for i in range(0,len(ts_codes_list),100):
j=i+100
if(j>=len(ts_codes_list)):
j=len(ts_codes_list)
name=",".join(ts_codes_list[i:j])
part= pro.daily(ts_code=name, trade_date=get_date())[["ts_code","trade_date","open","close","high","low","volume"]]
daily=pd.concat([daily,part],ignore_index=True)
daily["trade_date"]=daily["trade_date"].apply(get_date_format)
return daily
其次,第一种办法是使用TA库
pip install TA
data = add_all_ta_features(df, "open", "high", "low", "close", "volume", fillna=True)
print("Added TA Features...")
data = data.reset_index(drop=True)
data.columns
Index(['date', 'open', 'high', 'low', 'close', 'volume', 'tic', 'day',
'volume_adi', 'volume_obv', 'volume_cmf', 'volume_fi', 'volume_mfi',
'volume_em', 'volume_sma_em', 'volume_vpt', 'volume_nvi', 'volume_vwap',
'volatility_atr', 'volatility_bbm', 'volatility_bbh', 'volatility_bbl',
'volatility_bbw', 'volatility_bbp', 'volatility_bbhi',
'volatility_bbli', 'volatility_kcc', 'volatility_kch', 'volatility_kcl',
'volatility_kcw', 'volatility_kcp', 'volatility_kchi',
'volatility_kcli', 'volatility_dcl', 'volatility_dch', 'volatility_dcm',
'volatility_dcw', 'volatility_dcp', 'volatility_ui', 'trend_macd',
'trend_macd_signal', 'trend_macd_diff', 'trend_sma_fast',
'trend_sma_slow', 'trend_ema_fast', 'trend_ema_slow', 'trend_adx',
'trend_adx_pos', 'trend_adx_neg', 'trend_vortex_ind_pos',
'trend_vortex_ind_neg', 'trend_vortex_ind_diff', 'trend_trix',
'trend_mass_index', 'trend_cci', 'trend_dpo', 'trend_kst',
'trend_kst_sig', 'trend_kst_diff', 'trend_ichimoku_conv',
'trend_ichimoku_base', 'trend_ichimoku_a', 'trend_ichimoku_b',
'trend_visual_ichimoku_a', 'trend_visual_ichimoku_b', 'trend_aroon_up',
'trend_aroon_down', 'trend_aroon_ind', 'trend_psar_up',
'trend_psar_down', 'trend_psar_up_indicator',
'trend_psar_down_indicator', 'trend_stc', 'momentum_rsi',
'momentum_stoch_rsi', 'momentum_stoch_rsi_k', 'momentum_stoch_rsi_d',
'momentum_tsi', 'momentum_uo', 'momentum_stoch',
'momentum_stoch_signal', 'momentum_wr', 'momentum_ao', 'momentum_kama',
'momentum_roc', 'momentum_ppo', 'momentum_ppo_signal',
'momentum_ppo_hist', 'others_dr', 'others_dlr', 'others_cr'],
dtype='object')
第二种使用stockstats库进行对OHCLV的转换。
from stockstats import StockDataFrame as Sdf
df = data.copy()
df = df.sort_values(by=['tic','date'])
stock = Sdf.retype(df.copy())
unique_ticker = stock.tic.unique()
for indicator in self.tech_indicator_list:
indicator_df = pd.DataFrame()
for i in range(len(unique_ticker)):
try:
temp_indicator = stock[stock.tic == unique_ticker[i]][indicator]
temp_indicator = pd.DataFrame(temp_indicator)
temp_indicator['tic'] = unique_ticker[i]
temp_indicator['date'] = df[df.tic == unique_ticker[i]]['date'].to_list()
indicator_df = indicator_df.append(
temp_indicator, ignore_index=True
)
except Exception as e:
print(e)
df =df.merge(indicator_df[['tic','date',indicator]],on=['tic','date'],how='left')
df = df.sort_values(by=['date','tic'])