业余时间学习下深度学习,个人理解就是对已有的数据进行非线性映射,然后再映射的空间内找出数据的特征(个人理解可能不准,请指正)。这门课程很好,通俗易懂,有时间的可以学习下。https://mooc.study.163.com/smartSpec/detail/1001319001.htm
目前是预测收盘价,后面会继续更新,预测更多的数据。
源码地址:https://github.com/yu-bo/stock
前端显示地址:https://github.com/yu-bo/my_project
tushare地址:https://tushare.pro/register?reg=341048
然后想着找一些数据练习下,网上的教程例子很多,个人想着预测下股票信息,大概的过程如下:
1、使用tushare获取stock信息,
2、对数据进行处理,做好train_x和train_y的对应关系
3、训练和预测 ,网络是学习x也y之间的映射关系,网络要与数据匹配
获取信息:
def getStockInfo(symbol, ts_code, start_time,end_time):
df_list=[]
while start_time!= end_time:
df_t = pro.daily(ts_code=ts_code, start_date=start_time, end_date=end_time)
df_list.append(df_t)
end_time = df_t.tail(1)["trade_date"].values[0]
if len(df_list) != 0:
data = pd.concat(df_list, join="inner")
else:
data =None
return data
对数据处理:
首先读取数据,然后分离出训练数据和预测数据,输入数据由 连续几日的的最高、最低价等信息构成的二维数据,输出数据为下一日的收盘家,将输入与输出对应形成训练数据进行训练 ,通常数据要进行标准化处理。
原始数据:
ts_code,trade_date,open,high,low,close,pre_close,change,pct_chg,vol,amount
000002.SZ,20200312,30.01,30.63,29.6,29.95,30.38,-0.43,-1.4154,593494.28,1776382.654
000002.SZ,20200311,30.83,31.0,30.36,30.38,30.8,-0.42,-1.3636,563318.6,1728508.5869999998
000002.SZ,20200310,30.6,31.57,30.2,30.8,30.55,0.25,0.8183,765993.64,2358611.141
000002.SZ,20200309,30.6,31.13,30.01,30.55,31.13,-0.58,-1.8632,843613.44,2579914.142
000002.SZ,20200306,31.81,31.99,30.99,31.13,32.3,-1.17,-3.6223,679528.96,2124469.847
000002.SZ,20200305,31.78,32.7,31.6,32.3,32.26,0.04,0.124,924170.06,2966839.995
000002.SZ,20200304,31.28,32.45,30.74,32.26,31.1,1.16,3.7299,1129796.24,3595379.313
000002.SZ,20200303,31.51,31.63,30.43,31.1,31.13,-0.03,-0.0964,942831.95,2913732.843000001
000002.SZ,20200302,29.9,31.63,29.9,31.13,29.59,1.54,5.2045,1375747.0,4275882.3489999985
000002.SZ,20200228,29.25,30.6,29.18,29.59,29.67,-0.08,-0.2696,1101299.76,3303591.4
000002.SZ,20200227,30.23,30.26,29.38,29.67,30.11,-0.44,-1.4613,922758.82,2745257.988
000002.SZ,20200226,28.85,30.96,28.6,30.11,29.33,0.78,2.6594,1654456.71,5002513.795
000002.SZ,20200225,28.82,29.65,28.8,29.33,28.9,0.43,1.4879,1052970.0,3080948.536000001
000002.SZ,20200224,29.1,29.15,28.42,28.9,29.25,-0.35,-1.1966,949995.58,2730214.174
def getTrainData(self, stockinfo: stockInfo) -> (np.array, np.array):
"""
获取单只股票的训练数据
:param stockInfo: stock 信息
:param only_untrain: 只使用未训练的数据
:return: 如果数据不够返回None
"""
symbol = stockinfo.symbol
df = stock_sql.getDailyFrame(symbol)
if len(df) <= self.SEQUENCE_LEN:
return None
# 对数据进行训了测试的分离处理
x, y = self.dataProcess(df)
train_x, scaler_x = self.dataSequence(x, nor= not self.verify)
train_y, scaler_y = self.dataSequence_y(y,nor =not self.verify)
if self.only_untrain:
index = parseUntrainedData(symbol, df)
if index >= len(train_x):
# 起始训练数据超出是长度,没有数据
return None
train_x, train_y = train_x[index:], train_y[index:]
return train_x, train_y,scaler_x,scaler_y
def dataProcess(self,df:pd.DataFrame)->(np.array,np.array):
"""
:param df:
:return:
"""
# 4.对数据按照表格拆分
column_x, column_y = columnSplit(self.verify)
x, y = df[column_x], df[column_y]
# 5拼凑数据 x的最后一行没有预测值 y的第一行没有 训练值
x, y = x.drop(len(x) - 1, axis=0), y.drop(0, axis=0)
y.reset_index(drop=True, inplace=True)
return x, y
def dataSequence(self,x:pd.DataFrame,nor:bool=True)->np.array:
"""
将dataFrame数据 组成训练序列
:param df: 原始dataFrame数据
:param nor: 是否对数据标准化
:param len: 序列长度
:return: 序列化数据以及 标准化的Scaler
"""
scaler = MinMaxScaler()
if nor :
data_all = np.array(x).astype("float64")
data_all = scaler.fit_transform(data_all)
else:
data_all = np.array(x)
data = []
for i in range(len(data_all) - self.SEQUENCE_LEN + 1):
data.append(data_all[i: i + self.SEQUENCE_LEN])
x = np.array(data).astype('float64')
return x, scaler
建立模型:
我的模型比较简单,后面边学边改,目前也不太知道该怎么改。
def model_1(shape):
model = keras.Sequential()
model.add(keras.Input(shape=shape))
#model.add(keras.layers.Dense(units=100, activation="tanh"))
model.add(keras.layers.LSTM(units=500, activation='tanh',return_sequences=True))
model.add(keras.layers.LSTM(units=500, activation='tanh', return_sequences=True))
model.add(keras.layers.LSTM(units=200,activation='tanh',return_sequences= False))
model.add(keras.layers.Dense(units=200, activation="tanh"))
model.add(keras.layers.Dense(units=20, activation="tanh"))
model.add(keras.layers.Dense(units=1))
model.compile(optimizer=keras.optimizers.Adam(),
loss="mse",
metrics=[keras.metrics.mae])
model.summary()
return model
结果预测:
预测与训练类似,需要形成要预测数据所对应的predict_x,shape 与train_x一致,让后送入训练好的模型进行训练,数据展示用echarts将预测数和是数据展示出来就好
预测代码
def getTestData(self, stockInfo:stockInfo):
data =self.getTrainData(stockInfo)
if data is not None and len(data[0])>=0:
train_x, train_y =data[0],data[1]
return train_x[-self.RREDICT_LEN:], train_y[-self.RREDICT_LEN:],data[2],data[3]
else:
return None
def predict(self, stockinfo: prepare.stockInfo) -> (np.array, np.array):
data = stock_sql.getRecordData(stockinfo.symbol)
if data:
return data[0], data[1]
data = self.dataPre.getTestData(stockinfo)
if data is None:
return np.array([]), np.array([])
test_x, real_y,scaler_x, scaler_y = data[0], data[1], data[2], data[3]
predict_y = self.model.predict(test_x)
if scaler_y is not None:
real_y = scaler_y.inverse_transform(real_y)
predict_y = scaler_y.inverse_transform(predict_y.astype("float64"))
real_y, predict_y = np.around( real_y, decimals=2), np.around(predict_y, decimals=2)
stock_sql.SaveRecordData(stockinfo, real_y, predict_y)
return real_y, predict_y
下面是一个预测的结果,不是很准确(序列变长,网络增大能改善不少),智能看个大概趋势,有严重的滞后,希望有懂的大神给点指导意见。