MNIST数据集是一个包含60000张训练图片,10000张测试图片的手写数字数据集,包含0~9十个类别的数字,且全部为单色图片。
图片来源于百度
解决问题的整体思路:
- 下载并加载数据,并做出一定的预先处理
pipline1 = transforms.Compose([ #随机旋转图片 transforms.RandomHorizontalFlip(), #将图片转化为Tensor格式 transforms.ToTensor(), #正则化(当模型出现过拟合的情况时,用来降低模型的复杂度) transforms.Normalize((0.1307),(0.3081)) ]) pipline2 = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307),(0.3081)) ]) #下载数据集 train_set = datasets.MNIST(root="./data", train=True, download=True, transform=pipline1) test_set = datasets.MNIST(root="./data", train=False, download=True, transform=pipline2) #加载数据集 trainloader = torch.utils.data.DataLoader(train_set, batch_size=64, shuffle=True) testloader = torch.utils.data.DataLoader(test_set, batch_size=32, shuffle=False)
shuffle指的是是否对加载的数据集进行打乱,通常对训练集需要进行使用。
RandomHorizontalFlip()函数必须定义在ToTensor()之前。
- 搭建LeNet神经网络结构,并定义前向传播的过程
class LeNet(nn.Module): def __init__(self): super(LeNet, self).__init__() self.conv1 = nn.Conv2d(1, 6, 5) self.relu = nn.ReLU() self.maxpool1 = nn.MaxPool2d(2, 2) self.conv2 = nn.Conv2d(6, 16, 5) self.maxpool2 = nn.MaxPool2d(2, 2) self.fc1 = nn.Linear(16*4*4, 120) self.fc2 = nn.Linear(120, 84) self.fc3 = nn.Linear(84, 10) def forward(self, x): x = self.conv1(x) x = self.relu(x) x = self.maxpool1(x) x = self.conv2(x) x = self.maxpool2(x) x = x.view(-1, 16*4*4) x = F.relu(self.fc1(x)) x = F.relu(self.fc2(x)) x = self.fc3(x) output = F.log_softmax(x, dim=1) return output
nn.Module继承父类,用以定义神经网络结构。
神经网络结构:
LeNet(
(conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
(relu): ReLU()
(maxpool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
(maxpool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(fc1): Linear(in_features=256, out_features=120, bias=True)
(fc2): Linear(in_features=120, out_features=84, bias=True)
(fc3): Linear(in_features=84, out_features=10, bias=True)
)dim=0表示二维中的行,dim=1在二维矩阵中表示列。
softmax函数可以将输入映射为0-1之间的实数,并且归一化保证和为1,因此多分类的概率之和也刚好为1。
而log_softmax则是对softmax取对数,相较softmax更稳定,降低溢出。
- 将定义好的网络结构搭载到gpu,并定义优化器
#创建模型,部署gpu model = LeNet().to(device) #定义优化器 optimizer = optim.Adam(model.parameters())
- 定义训练过程
def train_runner(model, device, trainloader, optimizer, epoch): #训练模型, 启用 BatchNormalization 和 Dropout, 将BatchNormalization和Dropout置为True model.train() total = 0 correct =0.0 #enumerate迭代已加载的数据集,同时获取数据和数据下标 for i, data in enumerate(trainloader, 0): inputs, labels = data #把模型部署到device上 inputs, labels = inputs.to(device), labels.to(device) #初始化梯度 optimizer.zero_grad() #保存训练结果 outputs = model(inputs) #计算损失和 #多分类情况通常使用cross_entropy(交叉熵损失函数), 而对于二分类问题, 通常使用sigmod loss = F.cross_entropy(outputs, labels) #获取最大概率的预测结果 #dim=1表示返回每一行的最大值对应的列下标 predict = outputs.argmax(dim=1) total += labels.size(0) correct += (predict == labels).sum().item() #反向传播 loss.backward() #更新参数 optimizer.step() if i % 1000 == 0: #loss.item()表示当前loss的数值 print("Train Epoch{} \t Loss: {:.6f}, accuracy: {:.6f}%".format(epoch, loss.item(), 100*(correct/total)))
- 定义测试过程
def test_runner(model, device, testloader): #模型验证, 必须要写, 否则只要有输入数据, 即使不训练, 它也会改变权值 #因为调用eval()将不启用 BatchNormalization 和 Dropout, BatchNormalization和Dropout置为False model.eval() #统计模型正确率, 设置初始值 correct = 0.0 test_loss = 0.0 total = 0 #torch.no_grad将不会计算梯度, 也不会进行反向传播 with torch.no_grad(): for data, label in testloader: data, label = data.to(device), label.to(device) output = model(data) test_loss += F.cross_entropy(output, label).item() predict = output.argmax(dim=1) #计算正确数量 total += label.size(0) correct += (predict == label).sum().item() #计算损失值 print("test_avarage_loss: {:.6f}, accuracy: {:.6f}%".format(test_loss/total, 100*(correct/total)))
- 运行程序
完整代码:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import time
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
epoch = 5
#对图像做预先处理
pipline1 = transforms.Compose([
#随机旋转图片
transforms.RandomHorizontalFlip(),
#将图片转化为Tensor格式
transforms.ToTensor(),
#正则化(当模型出现过拟合的情况时,用来降低模型的复杂度)
transforms.Normalize((0.1307),(0.3081))
])
pipline2 = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307),(0.3081))
])
#下载数据集
train_set = datasets.MNIST(root="./data", train=True, download=True, transform=pipline1)
test_set = datasets.MNIST(root="./data", train=False, download=True, transform=pipline2)
#加载数据集
trainloader = torch.utils.data.DataLoader(train_set, batch_size=64, shuffle=True)
testloader = torch.utils.data.DataLoader(test_set, batch_size=32, shuffle=False)
#构建LeNet网络模型
class LeNet(nn.Module):
def __init__(self):
super(LeNet, self).__init__()
self.conv1 = nn.Conv2d(1, 6, 5)
self.relu = nn.ReLU()
self.maxpool1 = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.maxpool2 = nn.MaxPool2d(2, 2)
self.fc1 = nn.Linear(16*4*4, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
def forward(self, x):
x = self.conv1(x)
x = self.relu(x)
x = self.maxpool1(x)
x = self.conv2(x)
x = self.maxpool2(x)
x = x.view(-1, 16*4*4)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
output = F.log_softmax(x, dim=1)
return output
#创建模型,部署gpu
model = LeNet().to(device)
#定义优化器
optimizer = optim.Adam(model.parameters())
#训练
def train_runner(model, device, trainloader, optimizer, epoch):
#训练模型, 启用 BatchNormalization 和 Dropout, 将BatchNormalization和Dropout置为True
model.train()
total = 0
correct =0.0
#enumerate迭代已加载的数据集,同时获取数据和数据下标
for i, data in enumerate(trainloader, 0):
inputs, labels = data
#把模型部署到device上
inputs, labels = inputs.to(device), labels.to(device)
#初始化梯度
optimizer.zero_grad()
#保存训练结果
outputs = model(inputs)
#计算损失和
#多分类情况通常使用cross_entropy(交叉熵损失函数), 而对于二分类问题, 通常使用sigmod
loss = F.cross_entropy(outputs, labels)
#获取最大概率的预测结果
#dim=1表示返回每一行的最大值对应的列下标
predict = outputs.argmax(dim=1)
total += labels.size(0)
correct += (predict == labels).sum().item()
#反向传播
loss.backward()
#更新参数
optimizer.step()
if i % 1000 == 0:
#loss.item()表示当前loss的数值
print("Train Epoch{} \t Loss: {:.6f}, accuracy: {:.6f}%".format(epoch, loss.item(), 100*(correct/total)))
#测试
def test_runner(model, device, testloader):
#模型验证, 必须要写, 否则只要有输入数据, 即使不训练, 它也会改变权值
#因为调用eval()将不启用 BatchNormalization 和 Dropout, BatchNormalization和Dropout置为False
model.eval()
#统计模型正确率, 设置初始值
correct = 0.0
test_loss = 0.0
total = 0
#torch.no_grad将不会计算梯度, 也不会进行反向传播
with torch.no_grad():
for data, label in testloader:
data, label = data.to(device), label.to(device)
output = model(data)
test_loss += F.cross_entropy(output, label).item()
predict = output.argmax(dim=1)
#计算正确数量
total += label.size(0)
correct += (predict == label).sum().item()
#计算损失值
print("test_avarage_loss: {:.6f}, accuracy: {:.6f}%".format(test_loss/total, 100*(correct/total)))
#调用
for epoch in range(1, epoch+1):
print("start_time",time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())))
train_runner(model, device, trainloader, optimizer, epoch)
test_runner(model, device, testloader)
print("end_time: ",time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())),'\n')
运行结果:
start_time 2020-10-17 13:33:27
Train Epoch1 Loss: 2.297104, accuracy: 9.375000%
test_avarage_loss: 0.005186, accuracy: 94.480000%
end_time: 2020-10-17 13:33:42start_time 2020-10-17 13:33:42
Train Epoch2 Loss: 0.090078, accuracy: 96.875000%
test_avarage_loss: 0.003072, accuracy: 96.810000%
end_time: 2020-10-17 13:33:55start_time 2020-10-17 13:33:55
Train Epoch3 Loss: 0.034272, accuracy: 100.000000%
test_avarage_loss: 0.002685, accuracy: 97.270000%
end_time: 2020-10-17 13:34:09start_time 2020-10-17 13:34:09
Train Epoch4 Loss: 0.116680, accuracy: 95.312500%
test_avarage_loss: 0.002360, accuracy: 97.580000%
end_time: 2020-10-17 13:34:22start_time 2020-10-17 13:34:22
Train Epoch5 Loss: 0.015962, accuracy: 100.000000%
test_avarage_loss: 0.002186, accuracy: 97.560000%
end_time: 2020-10-17 13:34:35
菜鸡一个,有问题欢迎大佬指出