# 导入所需库
import pandas as pd
import numpy as np
from numpy import log
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
import tensorflow.keras
from sklearn.metrics import r2_score
import random
import matplotlib.pyplot as plt
# 设置中文、负号正常显示
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
code = '000520' # 股票代码
# 全局参数,所有要调整的参数都在这里
dim = 300 # 输出维度数,也是LSTM的网络节点数
epochs = 10 # 训练代数,可以理解为训练次数
days = 5 # 读取多少天的数据作为一次预测。例如读取20天的历史数据来预测未来1天的情况
batch_size = 535 # 训练批次大小,就是一次性读取多少个样本进行一运算,越大运算速度越快,但是占用内存和显存越大,根据自己的机器性能设置。同时该参数还决定梯度下降算法的下降步长数。
# 开始构建网络
n_steps = days # 输入张量的维度数
n_features = 5 # 输入张量的维度
model_2 = Sequential()
# 激活函数用relu
model_2.add(LSTM(dim, activation='relu', input_shape=(n_steps, n_features)))
# 输出层使用全连接层,只要一个输出节点
model_2.add(Dense(1))
# 选择优化器和损失函数,优化器为线性规划算法,损失函数使用的是高维空间测定距离函数
model_2.compile(optimizer='rmsprop', loss='mse')
######## 从tushare获取数据 ####
import tushare as ts
data = ts.get_hist_data(code)
data = data.iloc[::-1] # dataframe数据翻转,因为接口的第一条数据是最新日期行情,我们希望最新行情在最后面
data_ = data[['open', 'close', 'high', 'low']]
data_['日收益率'] = log(data_[['close']]).diff(-1)
data_.dropna(axis=0, inplace=True)
data_.to_csv("123.csv")
# print(data_)
# ###### 加工数据 ###########
def processData(data, lb):
X, Y = [], []
for i in range(len(data) - lb - 1):
X.append(data[i:(i + lb), 0])
try:
Y.append(data[(i + 2 + lb), 0])
except:
Y.append(data[(i + lb), 0])
return np.array(X), np.array(Y)
def pData(data, lb):
X = []
for i in range(len(data) - lb - 1):
X.append(data[i:(i + lb)])
return np.array(X)
################
close = data_['close']
cl = np.array(close)
max_close = cl.max() # 数据归一化之前,保存原数据最大值,恢复元数据时用得到
min_close = cl.min() # 数据归一化之前,保存原数据最小值,恢复元数据时用得到
cl = cl.reshape(cl.shape[0], 1)
scl = MinMaxScaler()
sc2 = MinMaxScaler()
cl = scl.fit_transform(cl)
# 生成标签
_, y = processData(cl, days)
X = data_.values
plt.plot(X[int(X.shape[0] * 0.90):].reshape(-1,1),label='y_test')
plt.show()
X = sc2.fit_transform(X)
X = pData(X, days)
###### 选90%作为训练数据,10%测试数据 ##########
y_train, y_test = y[:int(y.shape[0] * 0.90)], y[int(y.shape[0] * 0.90):]
x_train, x_test = X[:int(X.shape[0] * 0.90)], X[int(X.shape[0] * 0.90):]
print("++++++++++++++++++++", type(x_test))
#############
History = model_2.fit(x_train, y_train, batch_size=batch_size, epochs=epochs,
validation_data=(x_test, y_test), shuffle=False)
plt.plot(History.history['loss'], label='loss')
plt.plot(History.history['val_loss'], label='val loss')
plt.legend(loc='best')
plt.title('The loss values of real and predict')
#plt.savefig('D:/mojin/self/try/LSTM/result/真实值与预测值的loss值.jpg')
plt.show()
#####################
# 将测试集中的所有切片以序列的方式进行预测,查看预测结果与真实值的拟合情况。
y_pred = model_2.predict(x_test)
fig = plt.gcf()
plt.figure(figsize=(8, 4))
y_test = y_test.reshape(-1,1)
y_test = np.multiply(y_test, max_close - min_close)
y_test = np.add(y_test, min_close) # 还原原来数据归一化之前的数值
y_pred = np.multiply(y_pred, max_close - min_close)
y_pred = np.add(y_pred, min_close) # 还原原来数据归一化之前的数值
plt.plot(y_test.reshape(-1,1),label='真实值')
plt.plot(y_pred,label='预测值')
plt.title('Predict of test data')
plt.text(100, 0.4, s='测试集R2:%.3f' % (r2_score(y_test.reshape(-1,1), y_pred)))
plt.legend(loc='best')
#plt.savefig('D:/mojin/self/try/LSTM/result/测试集的预测效果.jpg')
print("测试最值:", max_close, min_close)
plt.show();
#####
######### 预测数据 ##############
a = x_test[len(x_test)-1]
Xt = model_2.predict(a.reshape(1, days, 5)) #预测数据只需要调用这个函数就行, a 的数值可以任意构建
print("预测数据: ")
print(a)
print("预测结果: ", Xt)
# 还原原来数据归一化之前的数值
print("预测归一化之前的数值:", Xt[0][0] * (max_close - min_close) + min_close)
该代码参考编写:https://blog.csdn.net/lenglingling/article/details/103766624