import numpy as np # 数据处理
import pandas as pd # 数据处理
import matplotlib.pyplot as plt # 绘图
from sklearn.preprocessing import MinMaxScaler # 数据预处理
from sklearn.metrics import mean_squared_error
import torch
import torch.nn as nn # 导入pytorch中的基本类
from torch.utils.data import DataLoader, TensorDataset
import torch.optim as optim
import time
# 读取数据
df = pd.read_excel(r"C:\Users\tians\Desktop\白家包data.xlsx", sheet_name=0)
# 数据分割
train_size = int(len(df) * 0.67)
test_size = len(df) - train_size
train, test = df[:train_size], df[train_size:]
# 设置超参数
input_size = 3 # 输入的特征维度
hidden_size = 50
n_epochs = 2000
learning_rate = 0.01 # 学习率
window_size = 7
num_layers = 100
# 数据滑窗操作
# 单步预测滑窗
def create_multivariate_dataset_2(dataset, window_size):
X, y = [], []
for i in range(len(dataset) - window_size):
# 选取从第4列到最后一列的特征和标签
feature_and_label = dataset.iloc[i:i + window_size, 1:].values
# 下一个时间点的标签作为目标
target = dataset.iloc[i + window_size, -1]
X.append(feature_and_label)
y.append(target)
X = torch.FloatTensor(np.array(X, dtype=np.float32))
y = torch.FloatTensor(np.array(y, dtype=np.float32))
return X, y
# 设置GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# 数据归一化操作(包括标签)
features = ['rainfall', 'reservoir_level', 'displacement']
scaler = MinMaxScaler()
train[features] = scaler.fit_transform(train[features])
test[features] = scaler.transform(test[features])
# # 保存未归一化的标签以备后用
# train_labels = df['displacement'].values[:train_size]
# test_labels = df['displacement'].values[train_size:]
# 对数据进行滑窗
X_train_2, y_train_2 = create_multivariate_dataset_2(train[features], window_size)
X_test_2, y_test_2 = create_multivariate_dataset_2(test[features], window_size)
# 定义LSTM模型
class MyLSTM(nn.Module):
def __init__(self, input_dim, seq_length, hidden_size, num_layers):
super().__init__()
self.lstm = nn.LSTM(input_size=input_dim, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
self.linear = nn.Linear(hidden_size, 1)
def forward(self, x):
x, _ = self.lstm(x)
x = self.linear(x[:, -1, :])
return x
# 实例化模型
model = MyLSTM(input_size, window_size, hidden_size, num_layers).to(device)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
loss_fn = nn.MSELoss()
loader = DataLoader(TensorDataset(X_train_2, y_train_2), shuffle=True, batch_size=32)
# 初始化早停参数
early_stopping_patience = 3
early_stopping_counter = 0
best_train_rmse = float('inf')
start = time.time()
baseline_train_losses = []
baseline_test_losses = []
# 开始训练
for epoch in range(n_epochs):
model.train()
for X_batch, y_batch in loader:
y_pred = model(X_batch.to(device))
loss = loss_fn(y_pred, y_batch.to(device))
optimizer.zero_grad()
loss.backward()
optimizer.step()
# 验证与打印
if epoch % 50 == 0:
model.eval()
with torch.no_grad():
y_pred_train = model(X_train_2.to(device)).cpu()
train_rmse = np.sqrt(mean_squared_error(y_train_2, y_pred_train))
y_pred_test = model(X_test_2.to(device)).cpu()
test_rmse = np.sqrt(mean_squared_error(y_test_2, y_pred_test))
# 原始数据及预测数据展示
train_plot = np.ones_like(df) * np.nan
train_plot[window_size:train_size] = y_pred_train
test_plot = np.ones_like(df) * np.nan
test_plot[train_size:len(df)] = y_pred_test
print("Epoch %d: train RMSE %.4f, test RMSE %.4f" % (epoch, train_rmse, test_rmse))
baseline_train_losses.append(train_rmse)
baseline_test_losses.append(test_rmse)
# 早停检查
if train_rmse < best_train_rmse:
best_train_rmse = train_rmse
early_stopping_counter = 0
else:
early_stopping_counter += 1
if early_stopping_counter >= early_stopping_patience:
print(
f"Early stopping triggered after epoch {epoch}. Training RMSE did not decrease for {early_stopping_patience} consecutive epochs.")
break
print("Training Time: ", time.time() - start)
plt.figure(figsize=(10, 5))
plt.plot(df['displacement'], c='b')
plt.plot(train_plot, c='r')
plt.plot(test_plot, c='g')
代码附上,有大佬知道是怎么回事么。。。。