1. MNIST数据集简介
包含了6W训练样本和1W条测试样本,共10类(0-9),每张图片都做了尺寸归一化,都是28*28大小的灰度图。其中颜色通道为1,表示灰度;每个像素值的范围是0-255(0:黑色;255:白色)
2. 数据加载与探索
1. 数据加载
def load_data(data_path, batch_size):
# 1.构建transform,对图像做处理
transform = transforms.Compose([
transforms.ToTensor(), # 将图片转换成tensor
transforms.Normalize((0.1307,), (0.3081,)) # 正则化,降低模型复杂度
])
# 2.下载数据集
train_set = datasets.MNIST(data_path, train=True, download=True, transform=transform)
test_set = datasets.MNIST(data_path, train=False, download=True, transform=transform)
# 3.批量加载数据
train_loder = DataLoader(train_set, batch_size=batch_size, shuffle=True, pin_memory=True)
test_loder = DataLoader(test_set, batch_size=batch_size, shuffle=True, pin_memory=True)
return train_loder, test_loder
2. 数据探索
# 查看类别
print(train_set.classes)
# ['0 - zero', '1 - one', '2 - two', '3 - three', '4 - four', '5 - five', '6 - six', '7 - seven', '8 - eight', '9 - nine']
# 数据集大小
print(train_set.data.shape)
# torch.Size([60000, 28, 28])
3. 搭建网络模型
3.1 官网实现
from torch.optim.lr_scheduler import StepLR
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 32, 3, 1)
self.conv2 = nn.Conv2d(32, 64, 3, 1)
self.dropout1 = nn.Dropout2d(0.25)
self.dropout2 = nn.Dropout2d(0.5)
self.fc1 = nn.Linear(9216, 128)
self.fc2 = nn.Linear(128, 10)
def forward(self, x):
x = F.relu(self.conv1(x))
x = F.relu(self.conv2(x))
x = F.max_pool2d(x, 2)
x = self.dropout1(x)
x = torch.flatten(x, 1)
x = F.relu(self.fc1(x))
x = self.fc2(self.dropout2(x))
output = F.log_softmax(x, dim=1)
return output
3.2 自定义实现
能达到和官网相似的准确率99%,但运行时间减少一大半!!!
class Digit(nn.Module):
def __init__(self):
super(Digit, self).__init__()
self.conv1 = nn.Conv2d(1, 10, 5) # 1: 灰度图片的通道;10:输出通道;5:kernel
self.conv2 = nn.Conv2d(10, 20, 3) # 10:输入通道;20:输出通道;3:kernel
self.fc1 = nn.Linear(20*10*10, 500)
self.fc2 = nn.Linear(500, 10)
def forward(self, x):
input_size = x.size(0) # batch_size
x = F.relu(self.conv1(x)) # batch*1*28*28 --> batch*10*24*24 (28-5+1=24)
x = F.max_pool2d(x, 2, 2) # 24*24*10 --> 12*12*10
x = F.relu(self.conv2(x)) # 12*12*10 --> 10*10*20 (12-3+1=10)
x = x.view(-1, 10*10*20) # 拉平?
# x = x.view(input_size, -1) # 拉平,-1 自动计算维度,这里应为20*10*10=2000
x = F.relu(self.fc1(x)) # 2000 --> 500
pred = F.log_softmax(self.fc2(x), dim=1) # 计算分类后,每个数字的概率值 500 --> 10
return pred
3.3 自定义RNN实现
1. RNN模型定义
class RNNModel(nn.Module):
# layer_dim: RNN的层数
def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
super(RNNModel, self).__init__()
self.hidden_dim = hidden_dim
self.layer_dim = layer_dim
self.rnn = nn.RNN(input_dim, hidden_dim, layer_dim,
batch_first=True, nonlinearity='relu')
self.fc = nn.Linear(hidden_dim, output_dim)
def forward(self, x):
# (layer_dim, batch_size, hidden_dim)
h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).requires_grad_()
# h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).requires_grad()
# 分离隐藏状态,避免梯度爆炸
out, hn = self.rnn(x, h0.detach())
out = self.fc(out[:, -1, :])
return out
2. 相关模型参数
input_dim = 28 # 输入维度
hidden_dim = 100 # 隐藏层的维度
layer_dim = 2 # 2层RNN
output_dim = 10 # 输出维度
model = RNNModel(input_dim, hidden_dim, layer_dim, output_dim)
criterion = nn.CrossEntropyLoss()
lr = 0.01
optimizer = torch.optim.SGD(model.parameters(), lr=lr)
# 输出模型参数个数
print(len(list(model.parameters())))
# 10
# 输出模型各个参数大小
for param in list(model.parameters()):
print(param.size())
11
torch.Size([100, 28])
torch.Size([100, 100])
torch.Size([100])
torch.Size([100])
torch.Size([100, 100])
torch.Size([100, 100])
torch.Size([100])
torch.Size([100])
torch.Size([10, 100])
torch.Size([10])
3.4 自定义LSTM实现
class LSTMModel(nn.Module):
# layer_dim: LSTM的层数
def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
super(LSTMModel, self).__init__()
self.hidden_dim = hidden_dim
self.layer_dim = layer_dim
self.lstm = nn.LSTM(input_dim, hidden_dim, layer_dim, batch_first=True)
self.fc = nn.Linear(hidden_dim, output_dim)
def forward(self, x):
# (layer_dim, batch_size, hidden_dim)
# 初始化隐藏层状态全为0
h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).requires_grad_()
# 初始化cell state
c0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).requires_grad_()
# 分离隐藏状态,避免梯度爆炸
out, (hn, cn) = self.lstm(x, (h0.detach(), c0.detach()))
# 只需最后一层的隐藏层状态
out = self.fc(out[:, -1, :])
return out
4. 模型训练
4.1 官网及Digit模型训练
1. 模型训练
def train(model, train_loader, test_loader, optimizer, epochs):
for epoch in range(epochs):
train_loss = train_epoch(model, train_loader, optimizer)
test_loss, test_correct = test_epoch(model, test_loader)
print("Epoch: {0} Train loss: {1:.4f} Test loss: {2:.4f}, Accuracy: {3:.4f}"
.format(epoch, train_loss, test_loss, test_correct * 100.0))
2. 单轮训练
def train_epoch(model, train_loader, optimizer):
model.train()
train_loss = 0.0
for batch_id, (data, target) in enumerate(train_loader):
optimizer.zero_grad() # 梯度清零,防止不断累加
output = model(data)
loss = F.cross_entropy(output, target)
loss.backward()
optimizer.step()
train_loss += loss.item()
train_loss /= len(train_loader.dataset)
return train_loss
亦可用其他损失函数,如:
loss = F.nll_loss(output, target)
test_loss += F.nll_loss(output, target, reduction='sum').item()
3. 单轮验证
def test_epoch(model, test_loader):
model.eval()
test_correct = 0.0
test_loss = 0.0
with torch.no_grad(): # 不进行梯度计算,也不会进行梯度传播
for data, target in test_loader:
output = model(data)
test_loss += F.cross_entropy(output, target).item() # 交叉熵损失,用于多分类
# 找到概率值最大的下标
pred = output.argmax(dim=1, keepdim=True)
# 累计正确的值
test_correct += pred.eq(target.view_as(pred)).sum().item()
test_loss /= len(test_loader.dataset)
test_correct /= len(test_loader.dataset)
return test_loss, test_correct
找到概率值最大的下标,亦可使用如下方法:
pred = torch.max(output, dim=1)
pred = output.argmax(dim=1)
pred = output.argmax(dim=1, keepdim=True)
4.2 RNN&LSTM模型训练
1. 模型训练
def train(model, train_loader, test_loader, optimizer, criterion, epochs, sequence_dim, input_dim):
for epoch in range(epochs):
train_loss = train_epoch(model, train_loader, optimizer, criterion, sequence_dim, input_dim)
test_loss, test_correct = test_epoch(model, test_loader, criterion, sequence_dim, input_dim)
print("Epoch: {0} Train loss: {1:.4f} Test loss: {2:.4f}, Accuracy: {3:.4f}"
.format(epoch, train_loss, test_loss, test_correct * 100.0))
2. 单轮训练
def train_epoch(model, train_loader, optimizer, criterion, sequence_dim, input_dim):
model.train()
train_loss = 0.0
for batch_id, (data, target) in enumerate(train_loader):
data = data.view(-1, sequence_dim, input_dim).requires_grad_() # to(device)
optimizer.zero_grad()
output = model(data)
loss = criterion(output, target)
loss.backward()
optimizer.step()
train_loss += loss.item()
train_loss /= len(train_loader.dataset)
return train_loss
3. 单轮测试
# 定义测试方法
def test_epoch(model, test_loader, criterion, sequence_dim, input_dim):
model.eval()
test_correct = 0.0
test_loss = 0.0
with torch.no_grad():
for data, target in test_loader:
data = data.view(-1, sequence_dim, input_dim).requires_grad_()
output = model(data)
test_loss += criterion(output, target).item()
pred = output.argmax(dim=1, keepdim=True)
test_correct += pred.eq(target.view_as(pred)).sum().item()
test_loss /= len(test_loader.dataset)
test_correct /= len(test_loader.dataset)
print(len(test_loader.dataset))
return test_loss, test_correct
5. 模型调用
5.1 官网&Digit调用
def main():
EPOCHS = 3
BATCH_SIZE = 8
DATA_PATH = '../data/'
MODEL_PATH = '../model/'
model = Digit()
optimizer = optim.Adam(model.parameters())
train_loader, test_loader = load_data(DATA_PATH, BATCH_SIZE)
train(model, train_loader, test_loader, optimizer, EPOCHS)
自定义Digit效果:
5.2 RNN&LSTM调用
def main():
EPOCHS = 10
BATCH_SIZE = 8
DATA_PATH = '../data/'
MODEL_PATH = '../model/'
# 模型参数定义
input_dim = 28 # 输入维度
hidden_dim = 100 # 隐藏层的维度
layer_dim = 2 # 2层RNN
output_dim = 10 # 输出维度
sequence_dim = 28 # 序列长度
model = RNNModel(input_dim, hidden_dim, layer_dim, output_dim)
criterion = nn.CrossEntropyLoss()
lr = 0.01 # 注意学习率设置太大(比如0.1),会导致损失函数为Non
optimizer = SGD(model.parameters(), lr=lr)
train_loader, test_loader = load_data(DATA_PATH, BATCH_SIZE)
train(model, train_loader, test_loader, optimizer, criterion, EPOCHS, sequence_dim, input_dim)
RNN效果:
LSTM效果:
1层的lstm比2、3层效果更好,但不如Digit(),可继续优化。
layer_dim = 1