MNIST手写数字识别
0. 数据集介绍
训练样本60000个,测试样本10000个,无验证集。
每个样本为一张28*28的灰度图。
1. 数据下载
from torch.utils.data import DataLoader
dataRoot = "./MNISTDATA"
trainBatchSize = 64 # 训练batch size
testBatchSize = 1000 # 测试batch size
# torchvision.transforms是pytorch中的图像预处理包
transformer = torchvision.transforms.Compose([torchvision.transforms.ToTensor(),torchvision.transforms.Normalize((0.1307,), (0.3081,))])
trainDataLoader = DataLoader(torchvision.datasets.MNIST(dataRoot, train=True, transform=transformer, download=True), batch_size=trainBatchSize, shuffle=True)
testDataLoader = DataLoader(torchvision.datasets.MNIST(dataRoot, train=False,transform=transformer, download=True), batch_size=testBatchSize, shuffle=True)
下载路径设置为当前路径下的新建MNIST
目录下。使用torchvision.datasets.MNIST
对数据进行下载,当train=True
时,表示下载训练集,当train=False
时,表示下载测试集。torchvision.transforms
是pytorch中的图像预处理包,在这里数据需要转换成Tensor类型。
2. 数据查看
以testDataLoader为例,我们先查看它的一些基本信息。
print(len(testDataLoader)) # 10
print(len(testDataLoader.dataset)) # 10000
len(testDataLoader)
得到的是Batch的数目,len(testDataLoader.dataset)
得到的是测试集图片的张数。
读取第一个batch的数据。
examples = enumerate(trainDataLoader)
batch_idx, (example_data, example_targets) = next(examples)
print("example_data:", example_data)
print("example_targets:", example_targets)
print("example_data_shape:", example_data.shape) #example_data_shape: torch.Size([64, 1, 28, 28])
print("example_targets_shape:", example_targets.shape) #example_targets_shape: torch.Size([64])
按batch遍历数据。
for batch_idx, (data, target) in enumerate(trainDataLoader):
pass
可视化example_data中第0张图片
def pltv1(example_data, example_targets):
plt.figure()
plt.subplot(1, 1, 1)
plt.imshow(example_data[0, 0], cmap="gray") #example[0][0]表示该batch中第0张图,第二维表示通道数,因为是灰度图,所以只有一个通道
plt.title("Ground Truth: {}".format(example_targets[0]))
plt.xticks([])
plt.yticks([])
plt.show()
3. 构建网络训练及测试
网络构建
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
# kernel_size 指明 2d 卷积窗口的长度
# 输入网络数据形状为[N,C,H,W]
# [Number Channel Height Width]
self.conv1 = nn.Conv2d(in_channels=1, out_channels=10, kernel_size=5)
self.conv2 = nn.Conv2d(in_channels=10, out_channels=20, kernel_size=3)
self.conv3 = nn.Conv2d(in_channels=20, out_channels=40, kernel_size=2)
# 在训练过程中随机地从神经网络中删除单元(以及相应的连接),这样可以防止单元间的过度适应。
self.conv2_drop = nn.Dropout2d()
self.conv3_drop = nn.Dropout2d()
self.fc1 = nn.Linear(160, 50)
self.fc2 = nn.Linear(50, 10)
def forward(self, x):
# 2 表示池化窗口为2*2
x = F.max_pool2d(self.conv1(x), 2)
x = F.relu(x)
x = self.conv2_drop(self.conv2(x))
x = F.max_pool2d(x, 2)
x = F.relu(x)
x = F.relu(F.max_pool2d(self.conv3_drop(self.conv3(x)), 2))
# view函数相当于numpy的reshape
# -1表示一个不确定的数
# print(x.shape)
x = x.view(-1, 160)
# print(x.shape)
x = F.relu(self.fc1(x)) # self.fc1的第一维也要是160
x = F.dropout(x, training=self.training)
x = self.fc2(x)
return F.log_softmax(x) # 激活函数
train Function
def train(epoch):
network.train()
for batch_idx, (data, target) in enumerate(trainDataLoader):
# 将所有梯度值都设为0
optimizer.zero_grad()
# network(data)等价于network.forward(data)
output = network.forward(data)
# negative log likelihood loss
# 负对数似然损失
loss = F.nll_loss(output, target)
# Computes the gradient of current tensor w.r.t. graph leaves.
loss.backward()
# Performs a single optimization step (parameter update).
optimizer.step()
#
if batch_idx % log_interval == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\tBatch idx:{}'.format(epoch, batch_idx * len(data),len(trainDataLoader.dataset), 100. * batch_idx / len(trainDataLoader), loss.item(), batch_idx))
# 将当前Epoch的损失加入list中
train_losses.append(loss.item())
#
train_counter.append((batch_idx * 64) + ((epoch - 1) * len(trainDataLoader.dataset)))
torch.save(network.state_dict(), './model.pth')
torch.save(optimizer.state_dict(), './optimizer.pth')
test Function
def test():
# Sets the module in evaluation mode.
network.eval()
# 初始化测试损失为 0
test_loss = 0
# 初始化预测正确数目为 0
correct = 0
# 有无with torch.no_grad()的区别在于:
# 有with...时,output不具备grad_fn=<AddmmBackward>属性,不可进行反向传播
# 无with...时,output具备grad_fn=<AddmmBackward>属性,表示可以进行反向传播
with torch.no_grad():
# len(testDataLoader): 10
for data, target in testDataLoader:
output = network.forward(data)
# 在test集上加和负对数似然损失
test_loss += F.nll_loss(output, target, size_average=False).item()
# 输出预测值
pred = output.data.max(1, keepdim=True)[1]
# 如果正确 +1,错误则 +0
correct += pred.eq(target.data.view_as(pred)).sum()
# len testDataLoader dataset: 10000
# 相当于是在求平均每张图片的loss
test_loss /= len(testDataLoader.dataset)
test_losses.append(test_loss)
print('\nTest set: Avg. loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(test_loss, correct, len(testDataLoader.dataset), 100. * correct / len(testDataLoader.dataset)))
4. 执行训练及测试
Note:此训练测试是在CPU上进行的。
n_epoch = 3 # 在所有数据上训练的次数,全部数据训练完一遍叫做一个epoch
learningRate = 0.01 # 学习率
momentum = 0.5 # 动量,在SGD中使用,避免其陷入局部最优解
log_interval = 10
random_seed = 1 # 随机数初始种子
torch.manual_seed(random_seed)
network = Net()
# 打印模型结构
print(network)
# 优化器选择随机梯度下降SGD
optimizer = optim.SGD(network.parameters(), lr=learningRate, momentum=momentum)
# 训练损失
train_losses = []
#
train_counter = []
# 测试损失
test_losses = []
test_counter = [i * len(trainDataLoader.dataset) for i in range(n_epoch + 1)]
# 开始训练及测试
for epoch in range(1, n_epoch + 1):
train(epoch)
test()
# 开始预测
examples = enumerate(trainDataLoader)
# example_data_shape: torch.Size([64, 1, 28, 28])
batch_idx, (example_data, example_targets) = next(examples)
network.eval()
# 对64张图片同时进行预测
correct = 0
# example_data_shape: torch.Size([64, 1, 28, 28])
output = network.forward(example_data)
pred = output.data.max(1, keepdim=True)[1]
correct += pred.eq(example_targets.data.view_as(pred)).sum()
print("pred:", pred)
print("groundTruth:", example_targets)
print("正确个数:{}".format(correct))
结果输出
Test set: Avg. loss: 0.1682, Accuracy: 9503/10000 (95%)
...
groundTruth: tensor([8, 5, 8, 4, 8, 0, 0, 8, 7, 4, 6, 0, 7, 6, 5, 1, 7, 2, 6, 1, 5, 1, 9, 9, 6, 9, 2, 2, 8, 0, 1, 3, 6, 3, 7, 7, 3, 8, 4, 8, 0, 6, 7, 3, 1, 7, 1, 8, 6, 1, 9, 1, 1, 5, 2, 9, 3, 3, 6, 5, 1, 1, 1, 8])
正确个数:60