结合之前的文章:【深度学习】RNN模型训练MNIST数据集、【深度学习】CNN模型处理简单时间序列 。构建了通过RNN模型来处理简单时间序列的方法,思路如下:将数据每12个分为一组,将前11个数据导入RNN模型,来预测第12个数据。目前,模型仅能大致拟合出数据变化趋势,精度略低。
一、数据处理
1、读取EXCEL中的数据,并进行格式转换;
2、将数据划分为训练集和验证集;
3、对训练集数据进行归一化处理
4、对训练集数据进行分组
Datasets = pd.read_excel(io='dataset.xlsx', sheet_name='Sheet1', usecols='D')
data = Datasets['data'].values.astype(float)
train_set = data[:-int(len(data) * 0.2)]
test_set = data[-int(len(data) * 0.2):]
# print(len(train_set)) # 202
# print(len(test_set)) # 50
scaler = MinMaxScaler(feature_range=(-1, 1))
train_norm = scaler.fit_transform(train_set.reshape(-1, 1))
train_norm = torch.FloatTensor(train_norm).view(-1)
train_data = []
seq_size = 11
for i in range(len(train_norm) - seq_size):
window = train_norm[i:i + seq_size]
label = train_norm[i + seq_size]
train_data.append((window, label))
二、模型定义
import torch
from torch import nn
class RNN_Model(nn.Module):
def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
super(RNN_Model, self).__init__()
self.hidden_dim = hidden_dim
self.layer_dim = layer_dim
self.rnn = nn.RNN(input_dim, hidden_dim, layer_dim, batch_first=True, nonlinearity='relu')
# 全连接层
self.fc = nn.Linear(hidden_dim, output_dim)
def forward(self, x):
h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).requires_grad_()
# 分离隐藏状态,避免梯度爆炸
out, hn = self.rnn(x, h0.detach())
out = self.fc(out[:, -1, :])
return out
三、模型训练
input_dim = 11 # 输入维度
hidden_dim = 200
layer_dim = 3 # RNN层数
output_dim = 1 # 输出维度
mod = RNN_Model(input_dim, hidden_dim, layer_dim, output_dim)
loss_fn = nn.MSELoss()
learning_rate = 0.005
optimizer = torch.optim.SGD(mod.parameters(), lr=learning_rate)
# optimizer = torch.optim.Adam(mod.parameters(), lr=learning_rate)
total_train_step = 0
epoch = 1000
loss_list = []
sequence_dim = 1
mod.train()
for i in range(epoch):
for seq, y_true in train_data:
optimizer.zero_grad()
seq = seq.view(-1, sequence_dim, input_dim).requires_grad_()
out = mod(seq)
loss = loss_fn(out, y_true)
loss.backward()
optimizer.step()
total_train_step = total_train_step + 1
if total_train_step % 100 == 0:
print('训练次数:{},loss:{}'.format(total_train_step, loss.item()))
loss_list.append(loss.item())
四、模型验证
preds = train_norm[-seq_size:].tolist()
mod.eval()
for i in range(52):
seq = torch.FloatTensor(preds[-seq_size:])
with torch.no_grad():
preds.append(mod(seq.view(-1, sequence_dim, input_dim)).item())
print(preds)
true_value = scaler.inverse_transform(np.array(preds[seq_size:]).reshape(-1, 1))
print(true_value.tolist())
print(test_set)
plt.figure(1)
plt.plot(test_set, label='true value')
plt.plot(true_value, label='predict value')
plt.legend(loc="upper left")
plt.figure(2)
plt.plot(loss_list, label='loss')
plt.show()
五、模型运行效果
损失变化情况,变化十分不稳定
模型验证结果
六、完整代码
import pandas as pd
import numpy as np
import torch
from matplotlib import pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from RNN_model import *
Datasets = pd.read_excel(io='dataset.xlsx', sheet_name='Sheet1', usecols='D')
data = Datasets['data'].values.astype(float)
train_set = data[:-int(len(data) * 0.2)]
test_set = data[-int(len(data) * 0.2):]
# print(len(train_set)) # 202
# print(len(test_set)) # 50
scaler = MinMaxScaler(feature_range=(-1, 1))
train_norm = scaler.fit_transform(train_set.reshape(-1, 1))
train_norm = torch.FloatTensor(train_norm).view(-1)
train_data = []
seq_size = 11
for i in range(len(train_norm) - seq_size):
window = train_norm[i:i + seq_size]
label = train_norm[i + seq_size]
train_data.append((window, label))
input_dim = 11
hidden_dim = 200
layer_dim = 3
output_dim = 1
mod = RNN_Model(input_dim, hidden_dim, layer_dim, output_dim)
loss_fn = nn.MSELoss()
learning_rate = 0.005
optimizer = torch.optim.SGD(mod.parameters(), lr=learning_rate)
# optimizer = torch.optim.Adam(mod.parameters(), lr=learning_rate)
total_train_step = 0
epoch = 1000
loss_list = []
sequence_dim = 1
mod.train()
for i in range(epoch):
for seq, y_true in train_data:
optimizer.zero_grad()
seq = seq.view(-1, sequence_dim, input_dim).requires_grad_()
out = mod(seq)
loss = loss_fn(out, y_true)
loss.backward()
optimizer.step()
total_train_step = total_train_step + 1
if total_train_step % 100 == 0:
print('训练次数:{},loss:{}'.format(total_train_step, loss.item()))
loss_list.append(loss.item())
preds = train_norm[-seq_size:].tolist()
mod.eval()
for i in range(52):
seq = torch.FloatTensor(preds[-seq_size:])
with torch.no_grad():
preds.append(mod(seq.view(-1, sequence_dim, input_dim)).item())
print(preds)
true_value = scaler.inverse_transform(np.array(preds[seq_size:]).reshape(-1, 1))
print(true_value.tolist())
print(test_set)
plt.figure(1)
plt.plot(test_set, label='true value')
plt.plot(true_value, label='predict value')
plt.legend(loc="upper left")
plt.figure(2)
plt.plot(loss_list, label='loss')
plt.show()
模型定义
import torch
from torch import nn
class RNN_Model(nn.Module):
def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
super(RNN_Model, self).__init__()
self.hidden_dim = hidden_dim
self.layer_dim = layer_dim
self.rnn = nn.RNN(input_dim, hidden_dim, layer_dim, batch_first=True, nonlinearity='relu')
# 全连接层
self.fc = nn.Linear(hidden_dim, output_dim)
def forward(self, x):
h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).requires_grad_()
# 分离隐藏状态,避免梯度爆炸
out, hn = self.rnn(x, h0.detach())
out = self.fc(out[:, -1, :])
return out