首先,介绍tushare,这是一个非常友好的数据源
网址是:https://tushare.pro
数据丰富
拥有丰富的数据内容,如股票、基金、期货、数字货币等行情数据,公司财务、基金经理等基本面数据
获取简单
SDK开发包支持语言,同时提供HTTP Restful接口,最大程度方便不同人群的使用
落地方便
提供多种数据储存方式,如Oracle、MySQL,MongoDB、HDF5、CSV等,为数据获取提供了性能保证
包含以下几大类的数据
悠远的发展历史
强大的合作伙伴
丰富的种类
项目设计
data:数据爬取下来的存放路径
login:各种key存储的地方
model:模型存放的地方
picture:画图存放的地方
spider:爬取数据的地方
ts.py
这一块没什么好说的,基本上的tushare上有介绍使用的方法
网址:https://tushare.pro/document/2?doc_id=27
# from login.mysql_login import engine
# CONN = engine.connect()
from login.login_sdk import pro
import pandas as pd
if __name__ == '__main__':
ts_code = '000001.SZ'
start_date = '20120101'
end_date = '20191231'
data = pro.daily(ts_code=ts_code, start_date=start_date, end_date=end_date)
data.to_csv('../data/data.csv',index=False)
login:这里涉及到证书问题,所以就不贴代码了,详情可以到https://tushare.pro查看
model:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
def create_dataset(X,Y,window_size = 30,predict_size = 5):
data_X, data_Y = [], []
for i in range(len(X) - window_size - predict_size + 1):
a = X[i:(i + window_size)]
data_X.append(a)
data_Y.append(Y[i + window_size :i + window_size + predict_size,0])
return(np.array(data_X), np.array(data_Y))
def minMaxScaler(df,feature_range=(0, 1)):
scaler = MinMaxScaler(feature_range=feature_range)
data_raw = pd.DataFrame(df).values.astype("float32")
scaler.fit(data_raw)
return scaler
if __name__ == '__main__':
data = pd.read_csv('./data/data.csv')
data = data.sort_values('trade_date').reset_index(drop=True)
temp_data = data.copy()
# #####################################数据预处理###############################
import joblib
scaler = minMaxScaler(temp_data.vol)
temp_data.vol = \
scaler.transform(pd.DataFrame(temp_data.vol).values.astype("float32"))
joblib.dump(scaler, 'vol.save') # save
scaler = joblib.load('vol.save') # load
scaler = minMaxScaler(temp_data.turnover_rate)
temp_data.turnover_rate = \
scaler.transform(pd.DataFrame(temp_data.turnover_rate).values.astype("float32"))
scaler = minMaxScaler(temp_data.net_mf_vol,feature_range=(-1, 1))
temp_data.net_mf_vol = \
scaler.transform(pd.DataFrame(temp_data.net_mf_vol).values.astype("float32"))
scaler = minMaxScaler(temp_data.close)
temp_data.close = \
scaler.transform(pd.DataFrame(temp_data.close).values.astype("float32"))
# #############################################################################
# #####################################构造数据#################################
train_data = temp_data[temp_data['trade_date'] <= 20191021]
test_data = temp_data[temp_data['trade_date'] >= 20190901]
train_X = pd.DataFrame(train_data).values
train_Y = pd.DataFrame(train_data['close']).values
test_X = pd.DataFrame(test_data).values
test_Y = pd.DataFrame(test_data['close']).values
len(test_X)
window_size = 30
predict_size = 5
train_X,train_Y = create_dataset(train_X, train_Y, window_size=window_size, predict_size=predict_size)
test_X,test_Y = create_dataset(test_X, test_Y, window_size=window_size, predict_size=predict_size)
print(f'train_X.shape: {train_X.shape} train_Y.shape: {train_Y.shape} \n'
f'test_X.shape: {test_X.shape} test_Y.shape: {test_Y.shape} ')
# #############################################################################
# #####################################构造模型#################################
from keras.models import Sequential
from keras.layers.recurrent import LSTM
from keras.layers.core import Dense, Activation, Dropout
from keras.callbacks import EarlyStopping
early_stopping = EarlyStopping('loss', patience=50)
input_shape = (train_X.shape[1], train_X.shape[2])
output_shape = train_Y.shape[1]
model = Sequential()
model.add(LSTM(64,input_shape=input_shape, return_sequences=True))
model.add(Dense(32))
model.add(LSTM(32))
model.add(Dense(output_shape))
model.compile(loss="mean_squared_error",optimizer="adam",metrics=['accuracy'])
model.summary()
model.fit(train_X,train_Y,epochs=150,batch_size=32,verbose=2,callbacks=[early_stopping])
# #############################################################################
# #####################################预测数值#################################
train_Y_per = scaler.inverse_transform(model.predict(train_X))
train_Y_per = pd.DataFrame(train_Y_per)
train_Y_con = pd.DataFrame(scaler.inverse_transform(train_Y))
test_Y_per = scaler.inverse_transform(model.predict(test_X))
test_Y_per = pd.DataFrame(test_Y_per)
test_Y_con = pd.DataFrame(scaler.inverse_transform(test_Y))
# #############################################################################
# #####################################画图检验#################################
draw_train_Y_per = train_Y_per[-10:].reset_index(drop=True)
draw_train_Y_con = train_Y_con[-10:].reset_index(drop=True)
draw_test_Y_per = test_Y_per[-10:].reset_index(drop=True)
draw_test_Y_con = test_Y_con[-10:].reset_index(drop=True)
from pyecharts.charts import Line
from pyecharts import options as opts
title = '预测数据与实际数据对比图_训练数据'
path = './picture/line_train.html'
plt = (
Line(init_opts=opts.InitOpts(width="1920px", height="900px"))
.set_global_opts(title_opts=opts.TitleOpts(title=title),
yaxis_opts=opts.AxisOpts(), # 双轴1
legend_opts=opts.LegendOpts(type_='scroll',pos_left="left",orient='vertical',pos_top = 'middle'),
tooltip_opts=opts.TooltipOpts(trigger='axis'),
toolbox_opts=opts.ToolboxOpts(), # 工具栏
datazoom_opts=opts.DataZoomOpts(), # 缩放功能
xaxis_opts=opts.AxisOpts(type_="category") # 行坐标类型
)
.set_series_opts(label_opts=opts.LabelOpts(is_show=False)) # 是否显示数值
.extend_axis(
yaxis=opts.AxisOpts()
)
)
for i in range(len(draw_train_Y_per)):
draw_y1 = [round(i,2) for i in draw_train_Y_per[i:i + 1].T[i].to_list()]
draw_y2 = [round(i,2) for i in draw_train_Y_con[i:i + 1].T[i].to_list()]
x_lable = list(range(i,i + draw_train_Y_per.shape[1]))
temp_plt = (
Line()
.add_xaxis(x_lable)
.add_yaxis('预测数据_' + str(i),draw_y1)
)
plt.overlap(temp_plt)
temp_plt = (
Line()
.add_xaxis(x_lable)
.add_yaxis('实际数据_' + str(i),draw_y2)
)
plt.overlap(temp_plt)
plt.render(path)
title = '预测数据与实际数据对比图_测试数据'
path = './picture/line_test.html'
plt = (
Line(init_opts=opts.InitOpts(width="1920px", height="900px"))
.set_global_opts(title_opts=opts.TitleOpts(title=title),
yaxis_opts=opts.AxisOpts(), # 双轴1
legend_opts=opts.LegendOpts(type_='scroll', pos_left="left", orient='vertical',
pos_top='middle'),
tooltip_opts=opts.TooltipOpts(trigger='axis'),
toolbox_opts=opts.ToolboxOpts(), # 工具栏
datazoom_opts=opts.DataZoomOpts(), # 缩放功能
xaxis_opts=opts.AxisOpts(type_="category") # 行坐标类型
)
.set_series_opts(label_opts=opts.LabelOpts(is_show=False)) # 是否显示数值
.extend_axis(
yaxis=opts.AxisOpts()
)
)
for i in range(len(draw_test_Y_per)):
draw_y1 = [round(i, 2) for i in draw_test_Y_per[i:i + 1].T[i].to_list()]
draw_y2 = [round(i, 2) for i in draw_test_Y_con[i:i + 1].T[i].to_list()]
x_lable = list(range(i, i + draw_train_Y_per.shape[1]))
temp_plt = (
Line()
.add_xaxis(x_lable)
.add_yaxis('预测数据_' + str(i), draw_y1)
)
plt.overlap(temp_plt)
temp_plt = (
Line()
.add_xaxis(x_lable)
.add_yaxis('实际数据_' + str(i), draw_y2)
)
plt.overlap(temp_plt)
plt.render(path)
# #############################################################################
测试结果
训练数据测试
测试数据测试