pytorch入门实战笔记(一)——使用LeNet网络结构创建MNIST手写数字识别分类器

MNIST数据集是一个包含60000张训练图片,10000张测试图片的手写数字数据集,包含0~9十个类别的数字,且全部为单色图片。

图片来源于百度 

解决问题的整体思路:

  1. 下载并加载数据,并做出一定的预先处理
    pipline1 = transforms.Compose([
        #随机旋转图片
        transforms.RandomHorizontalFlip(),
        #将图片转化为Tensor格式
        transforms.ToTensor(),
        #正则化(当模型出现过拟合的情况时,用来降低模型的复杂度)
        transforms.Normalize((0.1307),(0.3081))    
    ])
    pipline2 = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307),(0.3081))
    ])
    #下载数据集
    train_set = datasets.MNIST(root="./data", train=True, download=True, transform=pipline1)
    test_set = datasets.MNIST(root="./data", train=False, download=True, transform=pipline2)
    #加载数据集
    trainloader = torch.utils.data.DataLoader(train_set, batch_size=64, shuffle=True)
    testloader = torch.utils.data.DataLoader(test_set, batch_size=32, shuffle=False)

    shuffle指的是是否对加载的数据集进行打乱,通常对训练集需要进行使用。

    RandomHorizontalFlip()函数必须定义在ToTensor()之前。

  2. 搭建LeNet神经网络结构,并定义前向传播的过程
    class LeNet(nn.Module):
        def __init__(self):
            super(LeNet, self).__init__()
            self.conv1 = nn.Conv2d(1, 6, 5)
            self.relu = nn.ReLU()
            self.maxpool1 = nn.MaxPool2d(2, 2)
            self.conv2 = nn.Conv2d(6, 16, 5)
            self.maxpool2 = nn.MaxPool2d(2, 2)
    
            self.fc1 = nn.Linear(16*4*4, 120)
            self.fc2 = nn.Linear(120, 84)
            self.fc3 = nn.Linear(84, 10)
    
        def forward(self, x):
            x = self.conv1(x)
            x = self.relu(x)
            x = self.maxpool1(x)
            x = self.conv2(x)
            x = self.maxpool2(x)
            x = x.view(-1, 16*4*4)
            x = F.relu(self.fc1(x))
            x = F.relu(self.fc2(x))
            x = self.fc3(x)
            output = F.log_softmax(x, dim=1)
            return output

    nn.Module继承父类,用以定义神经网络结构。

    神经网络结构:

    LeNet(
      (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
      (relu): ReLU()
      (maxpool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
      (maxpool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (fc1): Linear(in_features=256, out_features=120, bias=True)
      (fc2): Linear(in_features=120, out_features=84, bias=True)
      (fc3): Linear(in_features=84, out_features=10, bias=True)
    )

    dim=0表示二维中的行,dim=1在二维矩阵中表示列。

    softmax函数可以将输入映射为0-1之间的实数,并且归一化保证和为1,因此多分类的概率之和也刚好为1。

    而log_softmax则是对softmax取对数,相较softmax更稳定,降低溢出。

  3. 将定义好的网络结构搭载到gpu,并定义优化器
    #创建模型,部署gpu
    model = LeNet().to(device)
    #定义优化器
    optimizer = optim.Adam(model.parameters())

     

  4. 定义训练过程
    def train_runner(model, device, trainloader, optimizer, epoch):
        #训练模型, 启用 BatchNormalization 和 Dropout, 将BatchNormalization和Dropout置为True
        model.train()
        total = 0
        correct =0.0
        #enumerate迭代已加载的数据集,同时获取数据和数据下标
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data
            #把模型部署到device上
            inputs, labels = inputs.to(device), labels.to(device)
            #初始化梯度
            optimizer.zero_grad()
            #保存训练结果
            outputs = model(inputs)
            #计算损失和
            #多分类情况通常使用cross_entropy(交叉熵损失函数), 而对于二分类问题, 通常使用sigmod
            loss = F.cross_entropy(outputs, labels)
            #获取最大概率的预测结果
            #dim=1表示返回每一行的最大值对应的列下标
            predict = outputs.argmax(dim=1)
            total += labels.size(0)
            correct += (predict == labels).sum().item()
            #反向传播
            loss.backward()
            #更新参数
            optimizer.step()
            if i % 1000 == 0:
                #loss.item()表示当前loss的数值
                print("Train Epoch{} \t Loss: {:.6f}, accuracy: {:.6f}%".format(epoch, loss.item(), 100*(correct/total)))
    

     

  5. 定义测试过程
    def test_runner(model, device, testloader):
        #模型验证, 必须要写, 否则只要有输入数据, 即使不训练, 它也会改变权值
        #因为调用eval()将不启用 BatchNormalization 和 Dropout, BatchNormalization和Dropout置为False
        model.eval()
        #统计模型正确率, 设置初始值
        correct = 0.0
        test_loss = 0.0
        total = 0
        #torch.no_grad将不会计算梯度, 也不会进行反向传播
        with torch.no_grad():
            for data, label in testloader:
                data, label = data.to(device), label.to(device)
                output = model(data)
                test_loss += F.cross_entropy(output, label).item()
                predict = output.argmax(dim=1)
                #计算正确数量
                total += label.size(0)
                correct += (predict == label).sum().item()
            #计算损失值
            print("test_avarage_loss: {:.6f}, accuracy: {:.6f}%".format(test_loss/total, 100*(correct/total)))

     

  6. 运行程序

完整代码:

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import time 

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
epoch = 5

#对图像做预先处理
pipline1 = transforms.Compose([
    #随机旋转图片
    transforms.RandomHorizontalFlip(),
    #将图片转化为Tensor格式
    transforms.ToTensor(),
    #正则化(当模型出现过拟合的情况时,用来降低模型的复杂度)
    transforms.Normalize((0.1307),(0.3081))    
])
pipline2 = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307),(0.3081))
])
#下载数据集
train_set = datasets.MNIST(root="./data", train=True, download=True, transform=pipline1)
test_set = datasets.MNIST(root="./data", train=False, download=True, transform=pipline2)
#加载数据集
trainloader = torch.utils.data.DataLoader(train_set, batch_size=64, shuffle=True)
testloader = torch.utils.data.DataLoader(test_set, batch_size=32, shuffle=False)
#构建LeNet网络模型
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.relu = nn.ReLU()
        self.maxpool1 = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.maxpool2 = nn.MaxPool2d(2, 2)

        self.fc1 = nn.Linear(16*4*4, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = self.relu(x)
        x = self.maxpool1(x)
        x = self.conv2(x)
        x = self.maxpool2(x)
        x = x.view(-1, 16*4*4)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        output = F.log_softmax(x, dim=1)
        return output

#创建模型,部署gpu
model = LeNet().to(device)
#定义优化器
optimizer = optim.Adam(model.parameters())

#训练
def train_runner(model, device, trainloader, optimizer, epoch):
    #训练模型, 启用 BatchNormalization 和 Dropout, 将BatchNormalization和Dropout置为True
    model.train()
    total = 0
    correct =0.0
    #enumerate迭代已加载的数据集,同时获取数据和数据下标
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        #把模型部署到device上
        inputs, labels = inputs.to(device), labels.to(device)
        #初始化梯度
        optimizer.zero_grad()
        #保存训练结果
        outputs = model(inputs)
        #计算损失和
        #多分类情况通常使用cross_entropy(交叉熵损失函数), 而对于二分类问题, 通常使用sigmod
        loss = F.cross_entropy(outputs, labels)
        #获取最大概率的预测结果
        #dim=1表示返回每一行的最大值对应的列下标
        predict = outputs.argmax(dim=1)
        total += labels.size(0)
        correct += (predict == labels).sum().item()
        #反向传播
        loss.backward()
        #更新参数
        optimizer.step()
        if i % 1000 == 0:
            #loss.item()表示当前loss的数值
            print("Train Epoch{} \t Loss: {:.6f}, accuracy: {:.6f}%".format(epoch, loss.item(), 100*(correct/total)))

#测试
def test_runner(model, device, testloader):
    #模型验证, 必须要写, 否则只要有输入数据, 即使不训练, 它也会改变权值
    #因为调用eval()将不启用 BatchNormalization 和 Dropout, BatchNormalization和Dropout置为False
    model.eval()
    #统计模型正确率, 设置初始值
    correct = 0.0
    test_loss = 0.0
    total = 0
    #torch.no_grad将不会计算梯度, 也不会进行反向传播
    with torch.no_grad():
        for data, label in testloader:
            data, label = data.to(device), label.to(device)
            output = model(data)
            test_loss += F.cross_entropy(output, label).item()
            predict = output.argmax(dim=1)
            #计算正确数量
            total += label.size(0)
            correct += (predict == label).sum().item()
        #计算损失值
        print("test_avarage_loss: {:.6f}, accuracy: {:.6f}%".format(test_loss/total, 100*(correct/total)))
#调用
for epoch in range(1, epoch+1):
    print("start_time",time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())))
    train_runner(model, device, trainloader, optimizer, epoch)
    test_runner(model, device, testloader)
    print("end_time: ",time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())),'\n')

运行结果:

start_time 2020-10-17 13:33:27
Train Epoch1      Loss: 2.297104, accuracy: 9.375000%
test_avarage_loss: 0.005186, accuracy: 94.480000%
end_time:  2020-10-17 13:33:42 

start_time 2020-10-17 13:33:42
Train Epoch2      Loss: 0.090078, accuracy: 96.875000%
test_avarage_loss: 0.003072, accuracy: 96.810000%
end_time:  2020-10-17 13:33:55 

start_time 2020-10-17 13:33:55
Train Epoch3      Loss: 0.034272, accuracy: 100.000000%
test_avarage_loss: 0.002685, accuracy: 97.270000%
end_time:  2020-10-17 13:34:09 

start_time 2020-10-17 13:34:09
Train Epoch4      Loss: 0.116680, accuracy: 95.312500%
test_avarage_loss: 0.002360, accuracy: 97.580000%
end_time:  2020-10-17 13:34:22 

start_time 2020-10-17 13:34:22
Train Epoch5      Loss: 0.015962, accuracy: 100.000000%
test_avarage_loss: 0.002186, accuracy: 97.560000%
end_time:  2020-10-17 13:34:35

菜鸡一个,有问题欢迎大佬指出 

  • 5
    点赞
  • 10
    收藏
    觉得还不错? 一键收藏
  • 4
    评论
评论 4
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值