基于pytorch的MNIST数据集的四层CNN,测试准确率99.77%
MNIST数据集
MNIST 数据集已经是一个被”嚼烂”了的数据集, 很多教程都会对它”下手”, 几乎成为一个 “典范”. 不过有些人可能对它还不是很了解, 下面来介绍一下.
MNIST 数据集可在 http://yann.lecun.com/exdb/mnist/ 获取, 它包含了四个部分:
Training set images: train-images-idx3-ubyte.gz (9.9 MB, 解压后 47 MB, 包含 60,000 个样本)
Training set labels: train-labels-idx1-ubyte.gz (29 KB, 解压后 60 KB, 包含 60,000 个标签)
Test set images: t10k-images-idx3-ubyte.gz (1.6 MB, 解压后 7.8 MB, 包含 10,000 个样本)
Test set labels: t10k-labels-idx1-ubyte.gz (5KB, 解压后 10 KB, 包含 10,000 个标签)
MNIST 数据集来自美国国家标准与技术研究所, National Institute of Standards and Technology (NIST). 训练集 (training set) 由来自 250 个不同人手写的数字构成, 其中 50% 是高中学生, 50% 来自人口普查局 (the Census Bureau) 的工作人员. 测试集(test set) 也是同样比例的手写数字数据.
环境配置
python 3.7.6,GPU版PyTorch 1.7.1,torchvision 0.8.2,CUDA 10.1
cuDNN 7.6.5
文件存储结构
1---代码文件
1---mnist 文件夹
2---MNIST 文件夹
3---processed 文件夹
4---test.pt 文件
4---training.pt 文件
3---raw 文件夹
4---t10k-images-idx3-ubyte 文件
4---t10k-labels-idx1-ubyte 文件
4---train-images-idx3-ubyte 文件
4---train-labels-idx1-ubyte 文件
代码
引入库
import torch
import torchvision
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler
import matplotlib.pyplot as plt
from PIL import Image
import matplotlib.image as image
import cv2
import os
调用GPU
#调用GPU
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
torch.backends.cudnn.benchmark = True
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
torch.cuda.empty_cache()
初始化变量
#初始化变量
n_epochs = 100 #训练次数
batch_size_train = 240 #训练的 batch_size
batch_size_test = 1000 #测试的 batch_size
learning_rate = 0.001 # 学习率
momentum = 0.5 # 在梯度下降过程中解决mini-batch SGD优化算法更新幅度摆动大的问题,使得收敛速度更快
log_interval = 10 # 操作间隔
random_seed = 2 # 随机种子,设置后可以得到稳定的随机数
torch.manual_seed(random_seed)
导入数据集并进行数据增强
数据增强是对数据集中的图片进行平移旋转等变换。数据增强只针对训练集,使训练集的图片更具有多样性,让训练出来的模型的适应性更广。使用数据增强会使训练准确率下降,但是可以有效提高测试准确率。
#导入训练集并增强数据
train_loader = torch.utils.data.DataLoader(
torchvision.datasets.MNIST('./mnist/', train=True, download=False,
transform=torchvision.transforms.Compose([
torchvision.transforms.RandomAffine(degrees = 0,translate=(0.1, 0.1)),
torchvision.transforms.RandomRotation((-10,10)),#将图片随机旋转(-10,10)度
torchvision.transforms.ToTensor(),# 将PIL图片或者numpy.ndarray转成Tensor类型
torchvision.transforms.Normalize((0.1307,), (0.3081,))])
),
batch_size=batch_size_train, shuffle=True,num_workers=4, pin_memory=True) # shuffle如果为true,每个训练epoch后,会将数据顺序打乱
导入测试集
#导入测试集
test_loader = torch.utils.data.DataLoader(
torchvision.datasets.MNIST('./mnist/', train=False, download=False,
transform=torchvision.transforms.Compose([
torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize((0.1307,), (0.3081,))])
),
batch_size=batch_size_test, shuffle=True,num_workers=4, pin_memory=True)
加载测试集
# 用 enumerate 加载测试集
examples = enumerate(test_loader)
# 获取一个 batch
batch_idx, (example_data, example_targets) = next(examples)
# 查看 batch 数据,有10000张图像的标签,tensor 大小为 [1000, 1, 28, 28]
# 即图像为 28 * 28, 1个颜色通道(灰度图), 1000张图像
#print(example_targets)
#print(example_data.shape)
查看部分图片
#查看部分图片
fig = plt.figure()
for i in range(6):
plt.subplot(2,3,i+1)# 创建 subplot
plt.tight_layout()
plt.imshow(example_data[i][0], cmap='gray', interpolation='none')
plt.title("Label: {}".format(example_targets[i]))
plt.xticks([])
plt.yticks([])
plt.show()
model结构
#model
class CNNModel(nn.Module):
def __init__(self):
super(CNNModel, self).__init__()
# Convolution layer 1 ((w - f + 2 * p)/ s ) + 1
self.conv1 = nn.Conv2d(in_channels = 1 , out_channels = 32, kernel_size = 5, stride = 1, padding = 0 )
self.relu1 = nn.ReLU()
self.batch1 = nn.BatchNorm2d(32)
self.conv2 = nn.Conv2d(in_channels =32 , out_channels = 32, kernel_size = 5, stride = 1, padding = 0 )
self.relu2 = nn.ReLU()
self.batch2 = nn.BatchNorm2d(32)
self.maxpool1 = nn.MaxPool2d(kernel_size = 2, stride = 2)
self.conv1_drop = nn.Dropout(0.25)
# Convolution layer 2
self.conv3 = nn.Conv2d(in_channels = 32, out_channels = 64, kernel_size = 3, stride = 1, padding = 0 )
self.relu3 = nn.ReLU()
self.batch3 = nn.BatchNorm2d(64)
self.conv4 = nn.Conv2d(in_channels = 64, out_channels = 64, kernel_size = 3, stride = 1, padding = 0 )
self.relu4 = nn.ReLU()
self.batch4 = nn.BatchNorm2d(64)
self.maxpool2 = nn.MaxPool2d(kernel_size = 2, stride = 2)
self.conv2_drop = nn.Dropout(0.25)
# Fully-Connected layer 1
self.fc1 = nn.Linear(576,256)
self.fc1_relu = nn.ReLU()
self.dp1 = nn.Dropout(0.5)
# Fully-Connected layer 2
self.fc2 = nn.Linear(256,10)
def forward(self, x):
# conv layer 1 的前向计算,3行代码
out = self.conv1(x)
out = self.relu1(out)
out = self.batch1(out)
out = self.conv2(out)
out = self.relu2(out)
out = self.batch2(out)
out = self.maxpool1(out)
out = self.conv1_drop(out)
# conv layer 2 的前向计算,4行代码
out = self.conv3(out)
out = self.relu3(out)
out = self.batch3(out)
out