首先数据归一化处理
import numpy as np
import pandas as pd
# 读取数据集
data = pd.read_csv('./datasets/ETTh1.csv')
columns = list(data.columns)
data = data[columns[1:]]
data_value = data.values
# 划分训练集、验证集和测试集
train_set_size = int(0.6 * data_value.shape[0])
val_set_size = int(0.8 * data_value.shape[0])
train_data = data_value[:train_set_size]
val_data = data_value[train_set_size:val_set_size]
test_data = data_value[val_set_size:] # 2D-array of shape (3484, 1)
# Standardization
train_set_mean_value = train_data.mean(0)
train_set_std_value = train_data.std(0)
data_value = (data_value - train_set_mean_value) / train_set_std_value
df = pd.DataFrame(data_value, columns=columns[1:])
# 将DataFrame保存为csv文件
df.to_csv('./datasets/ETTh1_standardization1.csv', index=False)
然后读取数据,划分数据集,训练模型,预测
# 读取数据集,划分训练集,验证集和测试集,生成滑动窗口
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
class RNN(nn.Module):
def __init__(self, input_size, hidden_size, num_layers, output_size):
super(RNN, self).__init__()
self.hidden_size = hidden_size
self.num_layers = num_layers
self.rnn = nn.RNN(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers,
nonlinearity='tanh', bias=True, batch_first=True, dropout=0, bidirectional=False)
self.fc = nn.Linear(hidden_size, output_size)
def forward(self, x):
# 初始化隐藏状态
h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
# RNN层
out, _ = self.rnn(x, h0)
# 取最后一个时间步的输出
out = out[:, -1, :]
# 全连接层
out = self.fc(out)
return out
def MSE(pred, true):
return np.mean((pred - true) ** 2)
# 读取数据集
data = pd.read_csv('/kaggle/input/my-datasets/ETTh1_Standardized.csv')
data = data[['OT']]
data_value = data.values
# 划分训练集、验证集和测试集
train_set_size = int(0.6 * data_value.shape[0])
val_set_size = int(0.8 * data_value.shape[0])
train_data = data_value[:train_set_size]
val_data = data_value[train_set_size:val_set_size]
test_data = data_value[val_set_size:] # 2D-array of shape (3484, 1)
# generate sliding window for training
seq_len = 96
training_sliding_windows = []
for index in range(train_set_size - seq_len):
training_sliding_windows.append(train_data[index: index + seq_len])
x_train = np.array(training_sliding_windows) # 3D-array of shape (10356, 96, 1)
y_train = data_value[seq_len:train_set_size]
x_train = torch.from_numpy(x_train).type(torch.Tensor).to(device) # a tensor with Size([10356, 96, 1])
y_train = torch.from_numpy(y_train).type(torch.Tensor).to(device) # a tensor with Size([10356, 1])
# 训练模型
model = RNN(input_size=1, hidden_size=32, num_layers=2, output_size=1).to(device)
loss_fn = torch.nn.MSELoss()
optimiser = torch.optim.Adam(model.parameters(), lr=0.01)
num_epochs = 50
hist = np.zeros(num_epochs)
# training for num_epochs times
for t in range(num_epochs):
y_train_pred = model(x_train)
loss = loss_fn(y_train_pred, y_train)
if t % 10 == 0 and t != 0:
print("Epoch ", t, "MSE: ", loss.item())
hist[t] = loss.item()
optimiser.zero_grad()
loss.backward()
optimiser.step()
# 展示训练中每个epoch的误差
plt.figure(figsize=(15, 6))
plt.plot(list(range(1, len(hist) + 1)), hist, label="Training loss in each epoch")
plt.xlabel('Epoch')
plt.ylabel('Training Loss')
plt.show()
# 模型预测
prediction = []
for index in range(test_data.shape[0] - seq_len):
current_window = []
current_window.append(test_data[index: index + seq_len])
x_test = np.array(current_window)
x_test = torch.from_numpy(x_test).type(torch.Tensor).to(device) # a tensor with Size([1, 96, 1])
y_test_pred = model(x_test) # a tensor with Size([1, 1])
y_test_pred = y_test_pred.detach().cpu().numpy()
prediction.append(y_test_pred[0])
# 展示预测效果
prediction = np.array(prediction) # 2D-array of shape (3388, 1)
y_test = test_data[seq_len:] # 2D-array of shape (3388, 1)
plt.figure(figsize=(15, 6))
plt.plot(prediction, label='Prediction', color='green')
plt.plot(y_test, label='Ground Truth', color='red')
plt.legend()
plt.xlabel('Index')
plt.ylabel('Value')
plt.show()
# 测试集MSE
Testing_Error = MSE(y_test_pred, y_test)
print('MSE: {}'.format(Testing_Error))