-
因为RNN在长序列中进行反向传播,很可能会出现梯度消失或者梯度爆炸的情况,而LSTM能解决该问题,下面就使用LSTM对MNIST数据集进行分类操作
-
这里主要讲解搭建RNN部分,其他部分和前文中CNN搭建类似。
🌵 搭建LSTM 重要!:
class LSTM_RNN(nn.Module):
"""搭建LSTM"""
def __init__(self):
super(LSTM_RNN, self).__init__()
# LSTM层
self.lstm = nn.LSTM(input_size=input_size, # 输入单元个数
hidden_size=hidden_size, # 隐藏单元个数
num_layers=num_layers, # 隐藏层数
batch_first=True) # True:[batch, time_step, input_size] False:[time_step, batch, input_size]
# 输出层
self.output_layers = nn.Linear(in_features=in_features, # 输入特征个数
out_features=out_features) # 输出特征个数
def forward(self, x):
# x shape (batch, time_step, input_size)
# lstm_out shape (batch, time_step, output_size)
# h_n shape (n_layers, batch, hidden_size)
# h_c shape (n_layers, batch, hidden_size)
lstm_out, (h_n, h_c) = self.lstm(x, None) #
output = self.output_layers(lstm_out[:, 1, :]) # 选择最后一个时刻的LSTM作为输出
return output
完整代码:
"""
作者:Troublemaker
日期:2020/4/6 16:39
脚本:lstm_rnn.py
"""
import torch
import torch.nn as nn
import torch.utils.data as Data
import torchvision
import matplotlib.pyplot as plt
torch.manual_seed(1)
# 设置超参数
epoches = 2
batch_size = 64
time_step = 28
input_size = 28
learning_rate = 0.01
hidden_size = 64
num_layers = 1
# 训练集
train_dataset = torchvision.datasets.MNIST(root="./mnist/",train=True,transform=torchvision.transforms.ToTensor(), download=False)
# 测试集
test_dataset = torchvision.datasets.MNIST(root="./mnist/", train=False, transform=torchvision.transforms.ToTensor())
test_x = test_dataset.test_data.type(torch.FloatTensor)[:2000]/255
test_y = test_dataset.test_labels[:2000]
# print(test_dataset.test_data)
# print(test_dataset.test_data.size())
# plt.imshow(test_dataset.test_data[1].numpy())
# plt.show()
# 将训练级集入Loader中
train_loader = Data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True, num_workers=3)
class LSTM_RNN(nn.Module):
"""搭建LSTM神经网络"""
def __init__(self):
super(LSTM_RNN, self).__init__()
self.lstm = nn.LSTM(input_size=input_size,
hidden_size=hidden_size, # rnn 隐藏单元数
num_layers=num_layers, # rnn 层数
batch_first=True, # If ``True``, then the input and output tensors are provided as (batch, seq, feature). Default: False
)
self.output_layer = nn.Linear(in_features=64, out_features=10)
def forward(self, x):
# x shape (batch, time_step, input_size)
# lstm_out shape (batch, time_step, output_size)
# h_n shape (n_layers, batch, hidden_size)
# h_c shape (n_layers, batch, hidden_size)
lstm_out, (h_n, h_c) = self.lstm(x, None) # If `(h_0, c_0)` is not provided, both **h_0** and **c_0** default to zero.
output = self.output_layer(lstm_out[:, -1, :]) # 选择最后时刻lstm的输出
return output
def main():
lstm = LSTM_RNN()
print(lstm)
# 定义优化器和损失函数
optimizer = torch.optim.Adam(lstm.parameters(), lr=learning_rate)
loss_function = nn.CrossEntropyLoss()
for epoch in range(epoches):
print("进行第{}个epoch".format(epoch))
for step, (batch_x, batch_y) in enumerate(train_loader):
batch_x = batch_x.view(-1, 28, 28)
output = lstm(batch_x)
loss = loss_function(output, batch_y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
if step % 50 == 0:
test_output = lstm(test_x)
pred_y = torch.max(test_output, dim=1)[1].data.numpy()
accuracy = ((pred_y == test_y.data.numpy()).astype(int).sum()) / float(test_y.size(0))
print('Epoch: ', epoch, '| train loss: %.4f' % loss.data.numpy(), '| test accuracy: %.2f' % accuracy)
test_output = lstm(test_x[:10])
pred_y = torch.max(test_output, dim=1)[1].data.numpy().squeeze()
print(pred_y)
print(test_y[:10])
if __name__ == "__main__":
main()
-
训练开始时的表现:
-
训练完的表现
- 对于该任务最后的表现可能没有CNN那么好,但也是一个好的方法。