LSTM提高精度实践-股票预测分析

爱吃芝麻汤圆

已于 2023-10-21 20:25:38 修改

阅读量792

点赞数 4

分类专栏：深度学习文章标签： lstm 机器学习人工智能数据分析深度学习神经网络 rnn

于 2023-10-21 20:23:03 首次发布

本文链接：https://blog.csdn.net/woshihlf/article/details/133962501

版权

深度学习专栏收录该内容

2 篇文章

订阅专栏

这篇文章记录的是提高LSTM精度的实践和记录

衡量标准：MSE

实践：本次我们先不提高数据质量，从改进模型参数下手。

一个完整的demo如下：

下面给出实验结果（不要看横坐标，横坐标数值错误）：

成果：

Baseline如下：

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import tushare as ts
from sklearn.preprocessing import StandardScaler
from torch.utils.data import TensorDataset
from tqdm import tqdm

timestep = 1  # 时间步长，就是利用多少时间窗口
batch_size = 16  # 批次大小
input_dim = 4  # 每个步长对应的特征数量，就是使用每天的4个特征，最高、最低、开盘、落盘
hidden_dim = 64  # 隐层大小
output_dim = 1  # 由于是回归任务，最终输出层大小为1
num_layers = 3  # LSTM的层数
epochs = 10
best_loss = 0
model_name = 'LSTM'
save_path = './{}.pth'.format(model_name)

# 1.加载股票数据
pro = ts.pro_api('60204767b7232ff25b16589656eb5455c83dc37056050caf960c60d8')
df = pro.daily(ts_code='000001.SZ', start_date='20130711', end_date='20220711')

df.index = pd.to_datetime(df.trade_date)  # 索引转为日期
df = df.iloc[::-1]  # 由于获取的数据是倒序的，需要将其调整为正序

# 2.将数据进行标准化
scaler = StandardScaler()
scaler_model = StandardScaler()
data = scaler_model.fit_transform(np.array(df[['open', 'high', 'low', 'close']]).reshape(-1, 4))
scaler.fit_transform(np.array(df['close']).reshape(-1, 1))


# 形成训练数据，例如12345变成12-3，23-4，34-5
def split_data(data, timestep):
    dataX = []  # 保存X
    dataY = []  # 保存Y

    # 将整个窗口的数据保存到X中，将未来一天保存到Y中
    for index in range(len(data) - timestep):
        dataX.append(data[index: index + timestep])
        dataY.append(data[index + timestep][3])

    dataX = np.array(dataX)
    dataY = np.array(dataY)

    # 获取训练集大小
    train_size = int(np.round(0.8 * dataX.shape[0]))

    # 划分训练集、测试集
    x_train = dataX[: train_size, :].reshape(-1, timestep, 4)
    y_train = dataY[: train_size]

    x_test = dataX[train_size:, :].reshape(-1, timestep, 4)
    y_test = dataY[train_size:]

    return [x_train, y_train, x_test, y_test]


# 3.获取训练数据   x_train: 1700,1,4
x_train, y_train, x_test, y_test = split_data(data, timestep)

# 4.将数据转为tensor
x_train_tensor = torch.from_numpy(x_train).to(torch.float32)
y_train_tensor = torch.from_numpy(y_train).to(torch.float32)
x_test_tensor = torch.from_numpy(x_test).to(torch.float32)
y_test_tensor = torch.from_numpy(y_test).to(torch.float32)

# 5.形成训练数据集
train_data = TensorDataset(x_train_tensor, y_train_tensor)
test_data = TensorDataset(x_test_tensor, y_test_tensor)

# 6.将数据加载成迭代器
train_loader = torch.utils.data.DataLoader(train_data,
                                           batch_size,
                                           True)

test_loader = torch.utils.data.DataLoader(test_data,
                                          batch_size,
                                          False)


# 7.定义LSTM网络
class LSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, output_dim):
        super(LSTM, self).__init__()
        self.hidden_dim = hidden_dim  # 隐层大小
        self.num_layers = num_layers  # LSTM层数
        # input_dim为特征维度，就是每个时间点对应的特征数量，这里为4
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        output, (h_n, c_n) = self.lstm(x)  # output为所有时间片的输出，形状为：16,1,4
        # print(output.shape) torch.Size([16, 1, 64]) batch_size,timestep,hidden_dim
        # print(h_n.shape) torch.Size([3, 16, 64]) num_layers,batch_size,hidden_dim
        # print(c_n.shape) torch.Size([3, 16, 64]) num_layers,batch_size,hidden_dim
        batch_size, timestep, hidden_dim = output.shape

        # 将output变成 batch_size * timestep, hidden_dim
        output = output.reshape(-1, hidden_dim)
        output = self.fc(output)  # 形状为batch_size * timestep, 1
        output = output.reshape(timestep, batch_size, -1)
        return output[-1]  # 返回最后一个时间片的输出


model = LSTM(input_dim, hidden_dim, num_layers, output_dim)  # 定义LSTM网络
loss_function = nn.MSELoss()  # 定义损失函数
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)  # 定义优化器
def calculate_mse(y_true, y_pred):
    return mean_squared_error(y_true, y_pred)


# 8.模型训练
for epoch in range(epochs):
    model.train()
    running_loss = 0
    train_bar = tqdm(train_loader)  # 形成进度条
    for data in train_bar:
        x_train, y_train = data  # 解包迭代器中的X和Y
        optimizer.zero_grad()
        y_train_pred = model(x_train)
        loss = loss_function(y_train_pred, y_train.reshape(-1, 1))
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch + 1,
                                                                 epochs,
                                                                 loss)

    # 模型验证
    model.eval()
    test_loss = 0
    with torch.no_grad():
        test_bar = tqdm(test_loader)
        for data in test_bar:
            x_test, y_test = data
            y_test_pred = model(x_test)
            test_loss = loss_function(y_test_pred, y_test.reshape(-1, 1))

    if test_loss < best_loss:
        best_loss = test_loss
        torch.save(model.state_dict(), save_path)
        
    # 计算mse和平均mse
    y_train_pred = model(x_train_tensor)
    y_test_pred = model(x_test_tensor)
    train__mse = calculate_mse(y_train_tensor.detach().numpy(), y_train_pred.detach().numpy())
    test__mse = calculate_mse(y_test_tensor.detach().numpy(), y_test_pred.detach().numpy())

print(train__mse*100)
print(test__mse*100)

用MSE乘以100，方便观察。
进行了十次测试，得到test的mse的一个平均值为0.506，我们在这个基础上进行改进。

思考方向：

1.数据质量提高
2.模型内部细节
3.模型改进和融合

1.首先思考数据清洗：

把数据随时间变化做出图，看看有没有缺失值，异常值，试试数据中心化，标准化等方式。对于图像，还可以缩放、裁剪、旋转、翻转、填充、噪声添加、灰度变换、线性变换、仿射变换、亮度、饱和度及对比度变换等。

本例中，我们的数据是从网站拿的，股票价格无缺失，异常不予考虑（没这个能力），baseline中已经标准化了，就不做这一步优化了。

2.数据集容量提高：

显而易见，会很有用。

3.模型内部超参数改变：

        batch size的大小决定了深度学习训练过程中的完成每个epoch所需的时间，以及每次迭代之间梯度的平滑程度。batch size过小，花费时间多，同时梯度震荡严重，不利于收敛；batch size过大，不同batch的梯度方向没有任何变化，容易陷入局部极小值。

        shuffle=True可以提高网络的泛化性能，避免最终模型过拟合或欠拟合。尤其是当batch size比较小的时候尤其有用。注意：测试集不要shuffle。

learning rate学习率，也叫步长，学习率大，则易损失值爆炸、易振荡；学习率小，则易过拟合、收敛速度慢。最好的状态是，学习率随训练次数衰减的变化，也就是在训练初期，学习率比较大，随着训练的进行，学习率不断减小，直到模型收敛。
epochs，可以适当增加（小心过拟合）。

        Dropout：不会，所以我GPT了，解释如下：

        在深度学习网络的训练过程中，Dropout是一种正则化技术，它按照一定的概率将神经网络单元暂时从网络中丢弃。这个概率通常设置为0.5左右，但也可以根据具体情况进行调整。Dropout的目的是为了减轻过拟合的发生。

        Dropout的实现方式是在每次前向传播时，随机选择一部分神经网络单元，将它们的输出置为0，即丢弃它们对网络的影响。这样，每次前向传播时，网络的结构都会发生变化，从而增加了网络的泛化能力。

        Dropout并不一定要在全连接层后应用。实际上，可以在任何类型的层后应用Dropout，包括卷积层、池化层等。对于一些特定的网络结构，如卷积神经网络（CNN），一般在卷积层后应用Dropout。

        另外，Dropout也可以作为一种正则化技术来使用。正则化是一种用来防止过拟合的技术，它通过对模型的参数进行约束或惩罚，使模型更加平滑，减少对训练数据的过度拟合。Dropout可以看作是一种特殊的正则化技术，它通过对神经网络单元进行随机丢弃，达到使模型更加平滑、减少过拟合的目的。

对于LSTM，还可以选择不同的timestep，时间窗口。



4.特征工程：

选择其他有效的特征，以及增加特征（小心过拟合嗷）。

5.模型改进：

适当增加层数，减少层数。
适当改变LSTM单元数。

        加深网络层数和采用残差网络技术。

6.模型融合：

多用几个模型~~

实践：本次我们先不提高数据质量，从改进模型参数下手。

这个baseline有点简略，我们首先把训练过程作为一个函数。

这样后续可以选择训练多少轮求平均值，我们统一成，十轮求一次平均值。

    def train_save():
    global best_loss
    for epoch in range(epochs):
        model.train()
        running_loss = 0
        train_bar = tqdm(train_loader)  # 形成进度条
        for data in train_bar:
            x_train, y_train = data  # 解包迭代器中的X和Y
            optimizer.zero_grad()
            y_train_pred = model(x_train)
            loss = loss_function(y_train_pred, y_train.reshape(-1, 1))
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            train_bar.desc = "train epoch[{}/{}] loss:{:.6f}".format(epoch + 1,
                                                                     epochs,
                                                                     loss)
        # 模型验证
        model.eval()
        test_loss = 0
        with torch.no_grad():  # 不需要计算梯度。在评估模型时，我们不需要更新模型的参数，所以不需要计算梯度。
            test_bar = tqdm(test_loader)
            for data in test_bar:
                x_test, y_test = data
                y_test_pred = model(x_test)
                test_loss = loss_function(y_test_pred, y_test.reshape(-1, 1))

        if test_loss < best_loss:  # 如果当前的测试损失小于之前记录的最佳损失，我们就更新最佳损失，并保存当前的模型参数
            best_loss = test_loss
            torch.save(model.state_dict(), save_path)

接下来我们把整个过程放进一个循环，用来控制超参数的更改：

import ***

lr = 0.001  # 学习率
timestep = 1  # 时间步长，就是利用多少时间窗口
batch_size = 32  # 批次大小
input_dim = 4  # 每个步长对应的特征数量，就是使用每天的4个特征，最高、最低、开盘、落盘
#hidden_dim = 100  # 隐层大小
output_dim = 1  # 由于是回归任务，最终输出层大小为1
num_layers = 1  # LSTM的层数
epochs = 10
best_loss: int = 100
model_name = 'LSTM'
save_path = './{}.pth'.format(model_name)

train_mse = []
test_mse = []
train_temp_mse = []
test_temp_mse = []
for hidden_dim in range(80, 124 ,2):
    # 1.巴拉巴拉后续内容

最后写一个计算mse的函数，做出图，就可以控制超参数的修改啦！

一个完整的demo如下：

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import tushare as ts
from sklearn.preprocessing import StandardScaler
from torch.utils.data import TensorDataset
from tqdm import tqdm
from sklearn.metrics import mean_squared_error
from math import sqrt

lr = 0.001  # 学习率
timestep = 1  # 时间步长，就是利用多少时间窗口
batch_size = 32  # 批次大小
input_dim = 4  # 每个步长对应的特征数量，就是使用每天的4个特征，最高、最低、开盘、落盘
#hidden_dim = 100  # 隐层大小
output_dim = 1  # 由于是回归任务，最终输出层大小为1
num_layers = 1  # LSTM的层数
epochs = 10
best_loss: int = 100
model_name = 'LSTM'
save_path = './{}.pth'.format(model_name)



train_mse = []
test_mse = []
train_temp_mse = []
test_temp_mse = []
for hidden_dim in range(80, 124 ,2):
    # 1.加载股票数据
    pro = ts.pro_api('60204767b7232ff25b16589656eb5455c83dc37056050caf960c60d8')
    df = pro.daily(ts_code='000001.SZ', start_date='20130711', end_date='20220711')
    df.index = pd.to_datetime(df.trade_date)  # 将数据框（DataFrame）df的索引更改为df.trade_date列中的日期。trade_date列复制一份放到第一列
    df = df.iloc[::-1]  # 由于获取的数据是倒序的，需要将其调整为正序

    # 这段代码是用来对数据进行标准化处理的。标准化是一种常用的数据预处理方法，可以使得不同规模和量纲的数据转化为同一规模的数据。
    # 这样处理后，'open', 'high', 'low', 'close'四列和'close'列的数据都被转化为均值为0，标准差为1的数据。
    # 现在data只剩下四列了
    # 注意：我们最终得到的不是股价，所以等会还得反标准化

    # 创建一个StandardScaler对象
    scaler = StandardScaler()

    # 创建另一个StandardScaler对象
    scaler_model = StandardScaler()

    # 使用scaler_model对df的'open', 'high', 'low', 'close'四列进行标准化处理
    # np.array(df[['open', 'high', 'low', 'close']]).reshape(-1, 4)是将这四列转化为numpy数组并改变形状
    # fit_transform方法是先对数据进行拟合，然后对数据进行转换
    data = scaler_model.fit_transform(np.array(df[['open', 'high', 'low', 'close']]).reshape(-1, 4))

    # 使用scaler对df的'close'列进行标准化处理
    scaler.fit_transform(np.array(df['close']).reshape(-1, 1))


    # 定义一个函数，主要目的是将输入的数据分割为训练集和测试集，同时将数据按照时间步长进行划分。
    # 形成训练数据，例如12345变成12-3，23-4，34-5
    def split_data(data, timestep):
        dataX = []  # 保存X
        dataY = []  # 保存Y

        # 将整个窗口的数据保存到X中，将未来一天保存到Y中
        # 根据给定的时间步长（timestep）来划分数据。具体来说，它在数据中创建一个滑动窗口，窗口的大小就是时间步长。
        # 对于每个窗口，它将窗口内的数据添加到dataX，并将窗口后的一天的第四个数据添加到dataY。我们的推测目标就是data的第四列。
        for index in range(len(data) - timestep):
            dataX.append(data[index: index + timestep])
            dataY.append(data[index + timestep][3])

        dataX = np.array(dataX)
        dataY = np.array(dataY)

        # 获取训练集大小
        # shape[0]返回数组的第一维的长度，即dataX的总数。
        # np.round(0.8 * dataX.shape[0])：使用NumPy的round函数对计算结果进行四舍五入。
        # np.round函数返回的类型取决于输入的类型。如果输入是浮点数类型，那么np.round会返回浮点数类型。
        # int(np.round(0.8 * dataX.shape[0]))：最后，使用int函数将四舍五入的结果转换为整数。
        # train_size现在包含了应该用于训练的数据的数量，这是原始数据的大约80%。
        train_size = int(np.round(0.8 * dataX.shape[0]))

        # 划分训练集、测试集
        x_train = dataX[: train_size, :].reshape(-1, timestep, 4)
        y_train = dataY[: train_size]

        x_test = dataX[train_size:, :].reshape(-1, timestep, 4)
        y_test = dataY[train_size:]

        return [x_train, y_train, x_test, y_test]


    # 3.获取训练数据   x_train: 1700,1,4
    x_train, y_train, x_test, y_test = split_data(data, timestep)

    # 4.将数据转为tensor
    x_train_tensor = torch.from_numpy(x_train).to(torch.float32)
    y_train_tensor = torch.from_numpy(y_train).to(torch.float32)
    x_test_tensor = torch.from_numpy(x_test).to(torch.float32)
    y_test_tensor = torch.from_numpy(y_test).to(torch.float32)

    # 5.形成训练数据集
    train_data = TensorDataset(x_train_tensor, y_train_tensor)
    test_data = TensorDataset(x_test_tensor, y_test_tensor)

    # 6.将数据加载成迭代器
    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size,
                                               True)

    test_loader = torch.utils.data.DataLoader(test_data,
                                              batch_size,
                                              False)


    # hidden dim指的是输出的数量，这个是要连接“全连接层的”
    # 7.定义LSTM网络
    class LSTM(nn.Module):
        def __init__(self, input_dim, hidden_dim, num_layers, output_dim):
            super(LSTM, self).__init__()
            self.hidden_dim = hidden_dim  # 隐层维度
            self.num_layers = num_layers  # LSTM层数
            # input_dim为特征维度，就是每个时间点对应的特征数量，这里为4
            self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
            self.fc = nn.Linear(hidden_dim, output_dim)

        def forward(self, x):
            output, (h_n, c_n) = self.lstm(x)  # output为所有时间片的输出
            # 将输入数据x传入LSTM层，得到输出output和最后一个时间步的隐藏状态h_n和细胞状态c_n
            # output的形状是 (batch_size, seq_len, hidden_dim)。
            # seq_len 是序列长度，即每个样本的时间步长。
            # 在LSTM网络中，seq_len通常代表每个样本的时间步长，也就是每个样本包含的序列元素的数量。
            # 例如，如果你正在处理文本数据，seq_len可能就是每个句子的单词数量；如果你正在处理时间序列数据，seq_len可能就是你选择的时间窗口大小。
            batch_size, timestep, hidden_dim = output.shape  # 这一行获取output的形状，包括批次大小、时间步长和隐藏层大小，方便下面处理

            # 这行代码将 output 重塑为 (batch_size * timestep, hidden_dim) 的形状，这样做的目的是为了将每个时间步的输出都输入到全连接层中
            output = output.reshape(-1, hidden_dim)
            output = self.fc(output)
            output = output.reshape(timestep, batch_size, -1)  # 这一行将output的形状重新变成(timestep, batch_size, output_dim)
            return output[-1]  # 返回最后一个时间片的输出 (batch_size, output_dim)


    model = LSTM(input_dim, hidden_dim, num_layers, output_dim)  # 定义LSTM网络
    loss_function = nn.MSELoss()  # 定义损失函数
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)  # 定义优化器


    def train_save():
        global best_loss
        for epoch in range(epochs):
            model.train()
            running_loss = 0
            train_bar = tqdm(train_loader)  # 形成进度条
            for data in train_bar:
                x_train, y_train = data  # 解包迭代器中的X和Y
                optimizer.zero_grad()
                y_train_pred = model(x_train)
                loss = loss_function(y_train_pred, y_train.reshape(-1, 1))
                loss.backward()
                optimizer.step()

                running_loss += loss.item()
                train_bar.desc = "train epoch[{}/{}] loss:{:.6f}".format(epoch + 1,
                                                                         epochs,
                                                                         loss)
            # 模型验证
            model.eval()
            test_loss = 0
            with torch.no_grad():  # 不需要计算梯度。在评估模型时，我们不需要更新模型的参数，所以不需要计算梯度。
                test_bar = tqdm(test_loader)
                for data in test_bar:
                    x_test, y_test = data
                    y_test_pred = model(x_test)
                    test_loss = loss_function(y_test_pred, y_test.reshape(-1, 1))

            if test_loss < best_loss:  # 如果当前的测试损失小于之前记录的最佳损失，我们就更新最佳损失，并保存当前的模型参数
                best_loss = test_loss
                torch.save(model.state_dict(), save_path)


    def calculate_mse(y_true, y_pred):
        return mean_squared_error(y_true, y_pred)


    # 8.模型训练
    for epoch in range(10):
        # 训练并保存模型
        train_save()
        # 计算mse和平均mse
        y_train_pred = model(x_train_tensor)
        y_test_pred = model(x_test_tensor)
        train__mse = calculate_mse(y_train_tensor.detach().numpy(), y_train_pred.detach().numpy())
        test__mse = calculate_mse(y_test_tensor.detach().numpy(), y_test_pred.detach().numpy())
        # 添加mse值到列表中
        train_temp_mse.append(train__mse)
        test_temp_mse.append(test__mse)
    # 求均值
    train_mse_average = sum(train_temp_mse) / len(train_temp_mse)
    test_mse_average = sum(test_temp_mse) / len(test_temp_mse)
    train_mse.append(train_mse_average * 100)
    test_mse.append(test_mse_average * 100)

print(train_mse)
print(test_mse)

plt.figure()
plt.plot(train_mse)
plt.plot(test_mse)
plt.legend(['train_mse', 'test_mse'])
plt.xlabel('lr')
plt.ylabel('MSE')
plt.title('Train and Test MSE')
plt.show()

下面给出实验结果（不要看横坐标，横坐标数值错误）：

1.对于timestep从1-5：

由此看出，最好的时间窗口是1

对于股票预测，只用前一天的数据预测后一天是最稳妥的。

2.对于learing rate从0.001到0.010：

3.对于learing rate从0.0001到0.0010：

由此看出，最好的学习率在0.001

4.对于batch size从16到64：

5.对于batch size从64到128：

由此看出，最好的batch size在32

6.对于hidden dim从60到140：

7.对于hidden dim从128到196：

8.对于hidden dim从256到512：

        我们发现，到目前为止，随着hidden size的增大，test mse始终在减小，说明模型始终没有收敛，也没有过拟合，但是，与此同时，运算速度在迅速下降，当hidden size在100附近时，运算速度能达到800it/s，但是当hidden size在512时，速度只有200it/s，当hidden size在1024时，速度只有40it/s。我们要合理取舍运算速度和运算精度。

        现在我们来尝试求一下hidden size最大为多少时过拟合。这是一个相当恶心人的工作，你得不断去尝试。

hidden size 600-800：

hidden size 800-1024：

hidden size 1024-1124：

        随后我又多次测量hidden size为1024的情况，得到一个平均的test_mse是0.03706。（图示中mse均乘了100）。

num layer 1-4：

最高精度较低速度：

lr = 0.001

timestep = 1

batch_size = 32

hidden_dim = 1024

num_layers = 1

epochs = 10

较高精度较高速度：

lr = 0.001

timestep = 1

batch_size = 32

hidden_dim = 512

num_layers = 1

epochs = 10

成果：

baseline：test_mes = 0.506

改进后：test_mse = 0.03706

精度提高了12.65倍

明显可以看到，test拟合更好~

最后再给个代码：

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import tushare as ts
from sklearn.preprocessing import StandardScaler
from torch.utils.data import TensorDataset
from tqdm import tqdm
from sklearn.metrics import mean_squared_error
from math import sqrt

lr = 0.001  # 学习率
timestep = 1  # 时间步长，就是利用多少时间窗口
batch_size = 32  # 批次大小
input_dim = 4  # 每个步长对应的特征数量，就是使用每天的4个特征，最高、最低、开盘、落盘
hidden_dim = 1024  # 隐层大小
output_dim = 1  # 由于是回归任务，最终输出层大小为1
#num_layers = 1  # LSTM的层数
epochs = 10
best_loss: int = 100
model_name = 'LSTM'
save_path = './{}.pth'.format(model_name)



train_mse = []
test_mse = []
train_temp_mse = []
test_temp_mse = []
for num_layers in range(1,2):
    # 1.加载股票数据
    pro = ts.pro_api('60204767b7232ff25b16589656eb5455c83dc37056050caf960c60d8')
    df = pro.daily(ts_code='000001.SZ', start_date='20130711', end_date='20220711')
    df.index = pd.to_datetime(df.trade_date)  # 将数据框（DataFrame）df的索引更改为df.trade_date列中的日期。trade_date列复制一份放到第一列
    df = df.iloc[::-1]  # 由于获取的数据是倒序的，需要将其调整为正序

    # 这段代码是用来对数据进行标准化处理的。标准化是一种常用的数据预处理方法，可以使得不同规模和量纲的数据转化为同一规模的数据。
    # 这样处理后，'open', 'high', 'low', 'close'四列和'close'列的数据都被转化为均值为0，标准差为1的数据。
    # 现在data只剩下四列了
    # 注意：我们最终得到的不是股价，所以等会还得反标准化

    # 创建一个StandardScaler对象
    scaler = StandardScaler()

    # 创建另一个StandardScaler对象
    scaler_model = StandardScaler()

    # 使用scaler_model对df的'open', 'high', 'low', 'close'四列进行标准化处理
    # np.array(df[['open', 'high', 'low', 'close']]).reshape(-1, 4)是将这四列转化为numpy数组并改变形状
    # fit_transform方法是先对数据进行拟合，然后对数据进行转换
    data = scaler_model.fit_transform(np.array(df[['open', 'high', 'low', 'close']]).reshape(-1, 4))

    # 使用scaler对df的'close'列进行标准化处理
    scaler.fit_transform(np.array(df['close']).reshape(-1, 1))


    # 定义一个函数，主要目的是将输入的数据分割为训练集和测试集，同时将数据按照时间步长进行划分。
    # 形成训练数据，例如12345变成12-3，23-4，34-5
    def split_data(data, timestep):
        dataX = []  # 保存X
        dataY = []  # 保存Y

        # 将整个窗口的数据保存到X中，将未来一天保存到Y中
        # 根据给定的时间步长（timestep）来划分数据。具体来说，它在数据中创建一个滑动窗口，窗口的大小就是时间步长。
        # 对于每个窗口，它将窗口内的数据添加到dataX，并将窗口后的一天的第四个数据添加到dataY。我们的推测目标就是data的第四列。
        for index in range(len(data) - timestep):
            dataX.append(data[index: index + timestep])
            dataY.append(data[index + timestep][3])

        dataX = np.array(dataX)
        dataY = np.array(dataY)

        # 获取训练集大小
        # shape[0]返回数组的第一维的长度，即dataX的总数。
        # np.round(0.8 * dataX.shape[0])：使用NumPy的round函数对计算结果进行四舍五入。
        # np.round函数返回的类型取决于输入的类型。如果输入是浮点数类型，那么np.round会返回浮点数类型。
        # int(np.round(0.8 * dataX.shape[0]))：最后，使用int函数将四舍五入的结果转换为整数。
        # train_size现在包含了应该用于训练的数据的数量，这是原始数据的大约80%。
        train_size = int(np.round(0.8 * dataX.shape[0]))

        # 划分训练集、测试集
        x_train = dataX[: train_size, :].reshape(-1, timestep, 4)
        y_train = dataY[: train_size]

        x_test = dataX[train_size:, :].reshape(-1, timestep, 4)
        y_test = dataY[train_size:]

        return [x_train, y_train, x_test, y_test]


    # 3.获取训练数据   x_train: 1700,1,4
    x_train, y_train, x_test, y_test = split_data(data, timestep)

    # 4.将数据转为tensor
    x_train_tensor = torch.from_numpy(x_train).to(torch.float32)
    y_train_tensor = torch.from_numpy(y_train).to(torch.float32)
    x_test_tensor = torch.from_numpy(x_test).to(torch.float32)
    y_test_tensor = torch.from_numpy(y_test).to(torch.float32)

    # 5.形成训练数据集
    train_data = TensorDataset(x_train_tensor, y_train_tensor)
    test_data = TensorDataset(x_test_tensor, y_test_tensor)

    # 6.将数据加载成迭代器
    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size,
                                               True)

    test_loader = torch.utils.data.DataLoader(test_data,
                                              batch_size,
                                              False)


    # hidden dim指的是输出的数量，这个是要连接“全连接层的”
    # 7.定义LSTM网络
    class LSTM(nn.Module):
        def __init__(self, input_dim, hidden_dim, num_layers, output_dim):
            super(LSTM, self).__init__()
            self.hidden_dim = hidden_dim  # 隐层维度
            self.num_layers = num_layers  # LSTM层数
            # input_dim为特征维度，就是每个时间点对应的特征数量，这里为4
            self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
            self.fc = nn.Linear(hidden_dim, output_dim)

        def forward(self, x):
            output, (h_n, c_n) = self.lstm(x)  # output为所有时间片的输出
            # 将输入数据x传入LSTM层，得到输出output和最后一个时间步的隐藏状态h_n和细胞状态c_n
            # output的形状是 (batch_size, seq_len, hidden_dim)。
            # seq_len 是序列长度，即每个样本的时间步长。
            # 在LSTM网络中，seq_len通常代表每个样本的时间步长，也就是每个样本包含的序列元素的数量。
            # 例如，如果你正在处理文本数据，seq_len可能就是每个句子的单词数量；如果你正在处理时间序列数据，seq_len可能就是你选择的时间窗口大小。
            batch_size, timestep, hidden_dim = output.shape  # 这一行获取output的形状，包括批次大小、时间步长和隐藏层大小，方便下面处理

            # 这行代码将 output 重塑为 (batch_size * timestep, hidden_dim) 的形状，这样做的目的是为了将每个时间步的输出都输入到全连接层中
            output = output.reshape(-1, hidden_dim)
            output = self.fc(output)
            output = output.reshape(timestep, batch_size, -1)  # 这一行将output的形状重新变成(timestep, batch_size, output_dim)
            return output[-1]  # 返回最后一个时间片的输出 (batch_size, output_dim)


    model = LSTM(input_dim, hidden_dim, num_layers, output_dim)  # 定义LSTM网络
    loss_function = nn.MSELoss()  # 定义损失函数
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)  # 定义优化器


    def train_save():
        global best_loss
        for epoch in range(epochs):
            model.train()
            running_loss = 0
            train_bar = tqdm(train_loader)  # 形成进度条
            for data in train_bar:
                x_train, y_train = data  # 解包迭代器中的X和Y
                optimizer.zero_grad()
                y_train_pred = model(x_train)
                loss = loss_function(y_train_pred, y_train.reshape(-1, 1))
                loss.backward()
                optimizer.step()

                running_loss += loss.item()
                train_bar.desc = "train epoch[{}/{}] loss:{:.6f}".format(epoch + 1,
                                                                         epochs,
                                                                         loss)
            # 模型验证
            model.eval()
            test_loss = 0
            with torch.no_grad():  # 不需要计算梯度。在评估模型时，我们不需要更新模型的参数，所以不需要计算梯度。
                test_bar = tqdm(test_loader)
                for data in test_bar:
                    x_test, y_test = data
                    y_test_pred = model(x_test)
                    test_loss = loss_function(y_test_pred, y_test.reshape(-1, 1))

            if test_loss < best_loss:  # 如果当前的测试损失小于之前记录的最佳损失，我们就更新最佳损失，并保存当前的模型参数
                best_loss = test_loss
                torch.save(model.state_dict(), save_path)


    def calculate_mse(y_true, y_pred):
        return mean_squared_error(y_true, y_pred)


    # 8.模型训练
    for epoch in range(10):
        # 训练并保存模型
        train_save()
        # 计算mse和平均mse
        y_train_pred = model(x_train_tensor)
        y_test_pred = model(x_test_tensor)
        train__mse = calculate_mse(y_train_tensor.detach().numpy(), y_train_pred.detach().numpy())
        test__mse = calculate_mse(y_test_tensor.detach().numpy(), y_test_pred.detach().numpy())
        # 添加mse值到列表中
        train_temp_mse.append(train__mse)
        test_temp_mse.append(test__mse)


    plt.figure(figsize=(12, 8))
    plt.plot(scaler.inverse_transform((model(x_train_tensor).detach().numpy()).reshape(-1, 1)), "b")
    plt.plot(scaler.inverse_transform(y_train_tensor.detach().numpy().reshape(-1, 1)), "r")
    plt.legend()
    plt.show()

    y_test_pred = model(x_test_tensor)
    plt.figure(figsize=(12, 8))
    plt.plot(scaler.inverse_transform(y_test_pred.detach().numpy()), "b")
    plt.plot(scaler.inverse_transform(y_test_tensor.detach().numpy().reshape(-1, 1)), "r")
    plt.legend()
    plt.show()

    # 求均值
    train_mse_average = sum(train_temp_mse) / len(train_temp_mse)
    test_mse_average = sum(test_temp_mse) / len(test_temp_mse)
    train_mse.append(train_mse_average * 100)
    test_mse.append(test_mse_average * 100)

print(train_mse)
print(test_mse)

plt.figure()
plt.plot(train_mse)
plt.plot(test_mse)
plt.legend(['train_mse', 'test_mse'])
plt.xlabel('lr')
plt.ylabel('MSE')
plt.title('Train and Test MSE')
plt.show()