利用RNN模型训练MNIST数据集,训练20次后正确率达到了98%。目前还处于RNN模型学习初级阶段,如有错误,还望海涵。
一、数据集下载与预处理
可以直接在pycharm里下载数据集,速度还挺快的。
train_sets = datasets.MNIST(root='./data', train=True, transform=torchvision.transforms.ToTensor())
test_sets = datasets.MNIST(root='./data', train=False, transform=torchvision.transforms.ToTensor())
class_target = train_sets.classes
print(class_target)
# print(train_sets.data.shape)
# print(len(train_sets))
# print(len(test_sets))
batch_size = 32
epochs = 10
train_dataloader = DataLoader(train_sets, batch_size, shuffle=True)
test_dataloader = DataLoader(test_sets, batch_size, shuffle=True)
二、模型定义
根据pytorch官网的要求,构建RNN模型,代码参考自网上教程。
import torch
from torch import nn
class RNN_Model(nn.Module):
def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
super(RNN_Model, self).__init__()
self.hidden_dim = hidden_dim
self.layer_dim = layer_dim
self.rnn = nn.RNN(input_dim, hidden_dim, layer_dim, batch_first=True, nonlinearity='relu')
# 全连接层
self.fc = nn.Linear(hidden_dim, output_dim)
def forward(self, x):
h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).requires_grad_()
# 分离隐藏状态,避免梯度爆炸
out, hn = self.rnn(x, h0.detach())
out = self.fc(out[:, -1, :])
return out
三、设置相关参数与模型初步训练
设置输入维度、隐藏层个数、学习率等参数,并实例化模型,设置损失函数、优化器等部分。按照模板进行模型训练、验证并保存。
# 初始化模型
input_dim = 28 # 输入维度(28*28像素)
hidden_dim = 100
layer_dim = 2 # 2层RNN
output_dim = 10 # 输出维度 (0-9 10个数)
lr = 1e-2 # 学习率
# 模型实例化
model = RNN_Model(input_dim, hidden_dim, layer_dim, output_dim)
# 损失函数
loss_fn = nn.CrossEntropyLoss()
# 优化器
optimizer = torch.optim.SGD(model.parameters(), lr)
sequence_dim = 28 # 序列长度
loss_list = []
accuracy_list = []
iteration_list = []
iter = 0
for epoch in range(epochs):
print('-------第{}轮训练开始-------'.format(epoch + 1))
for i, (images, labels) in enumerate(train_dataloader):
model.train() # 声明训练
# view:调整向量维度-->(sequence_dim, input_dim) -1:动态调整维度
images = images.view(-1, sequence_dim, input_dim).requires_grad_()
# 梯度清零
optimizer.zero_grad()
# 前向传播
outputs = model(images)
# 计算损失
loss = loss_fn(outputs, labels)
# 反向传播
loss.backward()
# 更新参数
optimizer.step()
# 模型验证
iter += 1
if iter % 500 == 0:
model.eval()
correct = 0.
total = 0
# 迭代测试集
for images, labels in test_dataloader:
images = images.view(-1, sequence_dim, input_dim)
outputs = model(images)
predict = torch.max(outputs.data, 1)[1]
# 统计测试集大小
total += len(labels)
correct += (predict == labels).sum()
accuracy = correct / total * 100
loss_list.append(loss.data)
accuracy_list.append(accuracy)
iteration_list.append(iter)
print('lope:{},loss:{}, Accuracy:{}'.format(iter, loss.item(), accuracy))
torch.save(model, 'trained_1.pth')
在模型训练完成后,如果想在上次训练的基础上进行训练,可以将模型实例化步骤改为模型加载
model = torch.load('trained_1.pth')
模型保存时,可以在每次训练完成后保存一次,避免训练梯度出问题后无功而返。
save_name = 'trained__' + str(epoch+1) + '.pth'
torch.save(model, save_name)
四、模型检验
为了亲自查看模型训练效果,可以手动构建数据,导入模型后查看输出结果。
模型检验代码如下:
import torchvision
from PIL import Image
from MNIST_Model import *
# 图片路径
img_path = 'num6.jpg'
image = Image.open(img_path)
transform = torchvision.transforms.Compose([torchvision.transforms.Resize((28, 28)),
torchvision.transforms.Grayscale(num_output_channels=1),
torchvision.transforms.ToTensor()])
image = transform(image)
# 加载训练好的模型
mode = torch.load('trained__10.pth')
image = image.view(-1, 28, 28)
# 模型验证
mode.eval()
with torch.no_grad():
output = mode(image)
print(output)
predict = torch.max(output.data, 1)[1]
print(predict)
print(output.argmax(1))
五、代码汇总
模型训练与验证
import torch
from torch import nn
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
import torchvision
from MNIST_Model import *
train_sets = datasets.MNIST(root='./data', train=True, transform=torchvision.transforms.ToTensor())
test_sets = datasets.MNIST(root='./data', train=False, transform=torchvision.transforms.ToTensor())
class_target = train_sets.classes
print(class_target)
# print(train_sets.data.shape)
# print(len(train_sets))
# print(len(test_sets))
batch_size = 32
epochs = 10
train_dataloader = DataLoader(train_sets, batch_size, shuffle=True)
test_dataloader = DataLoader(test_sets, batch_size, shuffle=True)
'''
# 定义RNN模型
class RNN_Model(nn.Module):
def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
super(RNN_Model, self).__init__()
self.hidden_dim = hidden_dim
self.layer_dim = layer_dim
self.rnn = nn.RNN(input_dim, hidden_dim, layer_dim, batch_first=True, nonlinearity='relu')
# 全连接层
self.fc = nn.Linear(hidden_dim, output_dim)
def forward(self, x):
h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).requires_grad_()
# 分离隐藏状态,避免梯度爆炸
out, hn = self.rnn(x, h0.detach())
out = self.fc(out[:, -1, :])
return out
'''
# 初始化模型
input_dim = 28 # 输入维度(28*28像素)
hidden_dim = 100
layer_dim = 2 # 2层RNN
output_dim = 10 # 输出维度 (0-9 10个数)
lr = 1e-2 # 学习率
# 模型实例化
model = RNN_Model(input_dim, hidden_dim, layer_dim, output_dim)
# 损失函数
loss_fn = nn.CrossEntropyLoss()
# 优化器
optimizer = torch.optim.SGD(model.parameters(), lr)
sequence_dim = 28 # 序列长度
loss_list = []
accuracy_list = []
iteration_list = []
iter = 0
for epoch in range(epochs):
print('-------第{}轮训练开始-------'.format(epoch + 1))
for i, (images, labels) in enumerate(train_dataloader):
model.train() # 声明训练
# view:调整向量维度-->(sequence_dim, input_dim) -1:动态调整维度
images = images.view(-1, sequence_dim, input_dim).requires_grad_()
# 梯度清零
optimizer.zero_grad()
# 前向传播
outputs = model(images)
# 计算损失
loss = loss_fn(outputs, labels)
# 反向传播
loss.backward()
# 更新参数
optimizer.step()
# 模型验证
iter += 1
if iter % 500 == 0:
model.eval()
correct = 0.
total = 0
# 迭代测试集
for images, labels in test_dataloader:
images = images.view(-1, sequence_dim, input_dim)
outputs = model(images)
predict = torch.max(outputs.data, 1)[1]
# 统计测试集大小
total += len(labels)
correct += (predict == labels).sum()
accuracy = correct / total * 100
loss_list.append(loss.data)
accuracy_list.append(accuracy)
iteration_list.append(iter)
print('lope:{},loss:{}, Accuracy:{}'.format(iter, loss.item(), accuracy))
torch.save(model, 'trained_1.pth')
模型定义
import torch
from torch import nn
class RNN_Model(nn.Module):
def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
super(RNN_Model, self).__init__()
self.hidden_dim = hidden_dim
self.layer_dim = layer_dim
self.rnn = nn.RNN(input_dim, hidden_dim, layer_dim, batch_first=True, nonlinearity='relu')
# 全连接层
self.fc = nn.Linear(hidden_dim, output_dim)
def forward(self, x):
h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).requires_grad_()
# 分离隐藏状态,避免梯度爆炸
out, hn = self.rnn(x, h0.detach())
out = self.fc(out[:, -1, :])
return out
模型检验
import torch
import torchvision
from PIL import Image
from MNIST_Model import *
# 图片路径
img_path = 'num6.jpg'
image = Image.open(img_path)
transform = torchvision.transforms.Compose([torchvision.transforms.Resize((28, 28)),
torchvision.transforms.Grayscale(num_output_channels=1),
torchvision.transforms.ToTensor()])
image = transform(image)
# 加载训练好的模型
mode = torch.load('trained__10.pth')
image = image.view(-1, 28, 28)
# 模型验证
mode.eval()
with torch.no_grad():
output = mode(image)
print(output)
predict = torch.max(output.data, 1)[1]
print(predict)
print(output.argmax(1))