请教基于监督学习的时间序列LSTM模型是怎么实现预测的?(附完整代码)

这段时间学习Python,调通了基于监督学习的LSTM神经网络预测模型代码,在一般代码的基础上,做了单步和多步通用版的改进。调通的代码附后,供各位大咖指正。
        虽然代码调通了,但是发现输出的预测结果均滞后于实际值,更像是对原始数据的拟合而不是预测,想请教一下: 
        1、代码问题在哪里?     
        2、如果代码没问题,预测功能是怎么体现的?   
        3、如果有类似的群,方便也请大咖告知,可以加群学习,谢谢。
 
代码如下:

import pandas as pd
# 设置显示的最大列、宽等参数,消掉打印不完全中间的省略号
pd.set_option('display.max_columns', 1000)
pd.set_option('display.width', 1000)
pd.set_option('display.max_colwidth', 1000)
import matplotlib.pyplot as plt
import tensorflow as tf
import os
from pandas import read_excel
import numpy as np
from pandas import DataFrame
from pandas import concat
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential, load_model
from keras.layers import LSTM, Dense, Dropout, BatchNormalization
from numpy import concatenate
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from math import sqrt

# 定义参数
start_rate = 0.99
end_rate = 0.01
n_features = 21  # 特征值数
n_predictions = 1  # 预测值数
delay = 5  # 目标是未来第5个交易日
test_trade_date = []


# 定义字符串转换为浮点型
def str_to_float(s):
    s = s[:-1]
    s_float = float(s)
    return s_float


# 定义series_to_supervised()函数
# 将时间序列转换为监督学习问题
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    """
    Frame a time series as a supervised learning dataset.
    Arguments:
        data: Sequence of observations as a list or NumPy array.
        n_in: Number of lag observations as input (X).
        n_out: Number of observations as output (y).
        dropnan: Boolean whether or not to drop rows with NaN values.
    Returns:
        Pandas DataFrame of series framed for supervised learning.
    """
    n_vars = 1 if type(data) is list else data.shape[1]
    df = DataFrame(data)
    cols, names = list(), list()
    # input sequence (t-n, ... t-1)输入序列
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j + 1, i)) for j in range(n_vars)]
    # forecast sequence (t, t+1, ... t+n)预测序列
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j + 1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j + 1, i)) for j in range(n_vars)]
    # put it all together
    agg = concat(cols, axis=1)
    agg.columns = names
    # drop rows with NaN values
    nan_rows = agg[agg.isnull().T.any().T]
    if dropnan:
        agg.dropna(inplace=True)
    return agg


def generator(tsc='000001', delay=5):
    # 读取文件,删除不必要项
    stock_data = pd.read_csv(r"c:\python\日k线数据\%s.csv" % tsc, index_col=0)  # 读入股票数据,防止第一列被当作数据,加入index_col=0
    stock_data['schange'] = stock_data['close']
    stock_data.reset_index(drop=True, inplace=True)
    stock_data.drop(['ts_code', 'trade_date', 'pre_close', 'change', 'pct_chg'], axis=1,
                    inplace=True)  # df=df.iloc[:,1:13]
    # 缺失值填充
    stock_data.fillna(method='bfill', inplace=True)
    stock_data.fillna(method='ffill', inplace=True)
    # 打印数据的后5行
    n_features = len(stock_data.columns)
    # 获取DataFrame中的数据,形式为数组array形式
    values = stock_data.values
    # 确保所有数据为float类型
    values = values.astype('float32')
    # 特征的归一化处理
    scaler = MinMaxScaler(feature_range=(0, 1))
    # 数据一起处理再分训练集、测试集
    scaled = scaler.fit_transform(values)
    row = len(scaled)
    reframed = series_to_supervised(scaled, delay, 1)
    # 把数据分为训练集和测试集
    values = reframed.values
    train_end = int(np.floor(start_rate * row))
    test_start = train_end
    train = values[:train_end, :]
    test = values[test_start:, :]
    # 把数据分为输入和输出
    n_obs = delay * n_features
    # 分离特征集和标签
    train_X, train_y = train[:, :n_obs], train[:, -n_predictions]
    test_X, test_y = test[:, :n_obs], test[:, -n_predictions]

    # 把输入重塑成3D格式 [样例, 时间步, 特征]
    train_X = train_X.reshape((train_X.shape[0], 1, train_X.shape[1]))
    test_X = test_X.reshape((test_X.shape[0], 1, test_X.shape[1]))
    # 转化为三维数据,reshape input to be 3D [samples, timesteps, features]
    train_X = train_X.reshape((train_X.shape[0], delay, n_features))
    test_X = test_X.reshape((test_X.shape[0], delay, n_features))
    return train_X, train_y, test_X, test_y, scaler


# 搭建LSTM模型
train_X, train_y, test_X, test_y, scaler = generator()
model = Sequential()
model.add(LSTM(20, input_shape=(train_X.shape[1], train_X.shape[2]), return_sequences=True))
model.add(LSTM(units=20))
model.add(Dropout(0.5))
model.add(Dense(1, activation='relu'))
model.compile(loss='mean_squared_error', optimizer='adam')  # fit network  'mean_squared_error''mae'
history = model.fit(train_X, train_y, epochs=500, batch_size=100, validation_data=(test_X, test_y), verbose=2,
                    shuffle=False)
model.save('c:\python\model\model')
# 绘制损失图
plt.plot(history.history['loss'], label='train')
plt.plot(history.history['val_loss'], label='test')
plt.title('LSTM_000001.SZ', fontsize='12')
plt.ylabel('loss', fontsize='10')
plt.xlabel('epoch', fontsize='10')
plt.legend()
plt.show()

print("训练完成,开始预测……")
model = tf.keras.models.load_model('c:\python\model\model')
# 模型预测收益率
y_predict = model.predict(test_X)
n_features = test_X.shape[2]
# model.save(SAVE_PATH + 'model')
test_X = test_X.reshape((test_X.shape[0], test_X.shape[1] * test_X.shape[2]))

# 将预测结果按比例反归一化
inv_y_test = concatenate((test_X[:, -n_features:-1], y_predict), axis=1)
inv_y_test = scaler.inverse_transform(inv_y_test)
inv_y_predict = inv_y_test[:, -1]

# invert scaling for actual
# #将真实结果按比例反归一化
test_y = test_y.reshape((len(test_y), 1))
inv_y_train = concatenate((test_X[:, -n_features:-1], test_y), axis=1)
inv_y_train = scaler.inverse_transform(inv_y_train)
inv_y = inv_y_train[:, -1]
print('反归一化后的预测结果:', inv_y_predict)
print('反归一化后的真实结果:', inv_y)

# 写入文件
df = pd.DataFrame()
df['e'] = inv_y
df['pe'] = inv_y_predict
df.to_csv("c:\python\predict_result.csv")
# 绘图
'''
inv_y=inv_y[delay:,]
#inv_y=inv_y[:-delay,]
for i in range(delay):
    #inv_y=np.concatenate((inv_y,inv_y[-1:,]) , axis=0)
    inv_y = np.concatenate((inv_y[0:1, ],inv_y), axis=0)
'''
plt.plot(inv_y, color='red', label='Original')
plt.plot(inv_y_predict, color='green', label='Predict')
plt.xlabel('the number of test data')
plt.ylabel('5dayearn')
plt.title('predict')
plt.legend()
plt.show()

# 回归评价指标
# calculate MSE 均方误差
mse = mean_squared_error(inv_y, inv_y_predict)
# calculate RMSE 均方根误差
rmse = sqrt(mean_squared_error(inv_y, inv_y_predict))
# calculate MAE 平均绝对误差
mae = mean_absolute_error(inv_y, inv_y_predict)
# calculate R square
r_square = r2_score(inv_y, inv_y_predict)
print('均方误差(mse): %.6f' % mse)
print('均方根误差(rmse): %.6f' % rmse)
print('平均绝对误差(mae): %.6f' % mae)
print('R_square: %.6f' % r_square)

数据质量分析如下:
均方误差(mse): 0.673632
均方根误差(rmse): 0.820751
平均绝对误差(mae): 0.770078
R_square: 0.067422

代码生成5日数据预测和实际值对比图如下图所示:

实际数据与预测数据对比图

  • 2
    点赞
  • 49
    收藏
    觉得还不错? 一键收藏
  • 5
    评论
时间序列预测是一种常见的任务,LSTM(长短期记忆)模型是一种适用于处理时间序列数据的循环神经网络模型。下面是一个简单的时间序列预测LSTM模型完整代码示例: ```python import numpy as np import pandas as pd from keras.models import Sequential from keras.layers import LSTM, Dense # 读取时间序列数据 data = pd.read_csv('data.csv') # 数据预处理 train_data = data[:800] # 前800个数据作为训练集 test_data = data[800:] # 后面的数据作为测试集 # 构建训练集和测试集 def create_dataset(dataset, look_back=1): X, Y = [], [] for i in range(len(dataset)-look_back): X.append(dataset[i:i+look_back]) Y.append(dataset[i+look_back]) return np.array(X), np.array(Y) look_back = 10 # 设置时间窗口大小 train_X, train_Y = create_dataset(train_data, look_back) test_X, test_Y = create_dataset(test_data, look_back) # 构建LSTM模型 model = Sequential() model.add(LSTM(units=50, input_shape=(look_back, 1))) model.add(Dense(units=1)) model.compile(optimizer='adam', loss='mean_squared_error') # 模型训练 model.fit(train_X, train_Y, epochs=100, batch_size=32) # 模型预测 train_predict = model.predict(train_X) test_predict = model.predict(test_X) # 可视化结果 import matplotlib.pyplot as plt plt.plot(train_data, label='Original data') plt.plot(range(look_back, len(train_predict)+look_back), train_predict, label='Train prediction') plt.plot(range(look_back+len(train_predict), len(train_data)), test_predict, label='Test prediction') .legend() plt.show() ``` 这段代码实现了一个简单的时间序列预测LSTM模型。首先,读取时间序列数据,并将数据划分为训练集和测试集。然后,通过`create_dataset`函数构建输入特征和标签。接下来,使用`Sequential`模型构建LSTM模型,并编译模型。然后,使用训练集对模型进行训练。最后,使用训练好的模型对训练集和测试集进行预测,并将结果可视化展示出来。
评论 5
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值