深度学习入门基础教程(二) CNN做CIFAR10数据集图像分类 pytorch版代码

一.  原理解读

分享一篇我觉得非常适合新手理解CNN的博文 https://blog.csdn.net/XYlittlework/article/details/79903225

CIFAR10数据集(训练集5万张,测试集1万张,每张彩图32*32*3,10类对应标签值0-9)如下:

二.  pytorch版完整代码复现

1. 全代码名称展示

可见,跑了几十轮后,在测试集上的准确率超过90% 

2. 代码

import torch
import torch.nn as nn
from torchvision.datasets import CIFAR10
from torchvision import transforms
from torch.utils.data import DataLoader,Dataset
from torch.optim import Adam
import os
import shutil

"""
torch.nn是专门为神经网络设计的模块化接口。nn构建于autograd之上,可以用来定义和运行神经网络。
nn.Module是nn中十分重要的类,包含网络各层的定义及forward方法。

定义自已的网络:
    需要继承nn.Module类,并实现forward方法。
    一般把网络中具有可学习参数的层放在构造函数__init__()中,
    不具有可学习参数的层(如ReLU)可放在构造函数中,也可不放在构造函数中(而在forward中使用nn.functional来代替)。    
    只要在nn.Module的子类中定义了forward函数,backward函数就会被自动实现(利用Autograd)。

注:Pytorch基于nn.Module构建的模型中,只支持mini-batch的Variable输入方式,
    比如,只有一张输入图片,也需要变成 N x C x H x W 的形式:    
    input_image = torch.FloatTensor(1, 28, 28)   
    input_image = input_image.unsqueeze(0)   # 1 x 1 x 28 x 28
"""
class Unit(nn.Module):#以上解释了这里为什么必须写出继承类nn.Module
    def __init__(self,inc,ouc):
        super(Unit,self).__init__()
        self.unit_net = nn.Sequential(nn.Conv2d(inc,ouc,kernel_size=3,padding=1),
                      nn.BatchNorm2d(ouc),
                      nn.ReLU())
    def forward(self, x):
        return self.unit_net(x)

class Net(nn.Module):
    def __init__(self):
        super(Net,self).__init__()
        self.net = nn.Sequential(Unit(3,32),#32*32
                                 Unit(32,32),
                                 Unit(32,32),

                                 nn.MaxPool2d(2),#16

                                 Unit(32,64),
                                 Unit(64,64),
                                 Unit(64,64),
                                 Unit(64,64),

                                 nn.MaxPool2d(2),#8

                                 Unit(64, 128),
                                 Unit(128, 128),
                                 Unit(128, 128),
                                 Unit(128, 128),

                                 nn.MaxPool2d(2),#4

                                 Unit(128,128),
                                 Unit(128,128),
                                 Unit(128,128),

                                 nn.AvgPool2d(4)#1
                                 )
        self.fc = nn.Linear(128, 10)

    def forward(self, x):
        y = self.net(x)
        y = y.view(-1,128)
        return self.fc(y)

'训练集转换'
train_transforms = transforms.Compose([
    transforms.RandomHorizontalFlip(),#随机翻转
    transforms.RandomCrop(32,padding=4),#剪裁
    transforms.ToTensor(),
    transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5))])
'train=True加载训练集'
train_set = CIFAR10('./data/',train=True,transform=train_transforms,download=True)
train_dataloader = DataLoader(train_set,batch_size=512,shuffle=True)

'测试集转换'
test_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5))])
'train=False加载测试集'
test_set = CIFAR10('./data/',train=False,transform=test_transforms,download=True)
test_dataloader = DataLoader(test_set,batch_size=512,shuffle=False)

param_path = r'./param/cifar10_cnn.pkl'
tmp_param_path = r'./param/cifar10_cnn_temp.pkl'

CUDA = torch.cuda.is_available()
# print(CUDA)
module = Net()
if CUDA:
    module.cuda()
optimizer = Adam(module.parameters(),lr=0.001,weight_decay=0.0001)
loss_f = nn.CrossEntropyLoss()#分类用交叉熵

'创建一个学习率调整函数,每30个周期将学习率除以10'
def adjust_lr_rate(epoch):
    lr = 0.001
    if epoch>180:
        lr = lr / 1000000
    elif epoch>150:
        lr = lr / 100000
    elif epoch>120:
        lr = lr / 10000
    elif epoch>90:
        lr = lr / 1000
    elif epoch>60:
        lr = lr / 100
    elif epoch>30:
        lr = lr / 10
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

def test():#测试集1万张
    test_acc = 0
    module.eval()
    for j,(imgs, labels) in enumerate(test_dataloader):#每次处理512张
        if CUDA:
            imgs = imgs.cuda()
            labels = labels.cuda()
        outs = module(imgs)
        #训练求loss是为了做权重更新,测试里不需要
        _, prediction = torch.max(outs, 1)
        test_acc += torch.sum(prediction == labels)
    test_acc = test_acc.cpu().item() / 10000
    return test_acc

def train(num_epoch):#训练集5万张
    if os.path.exists(param_path):
        module.load_state_dict(torch.load(param_path))
    for epoch in range(num_epoch):
        train_loss = 0
        train_acc = 0
        module.train()
        for i, (imgs, labels) in enumerate(train_dataloader):#每次处理512张
            # print('labels:',labels)#每个标签对应一个0-9的数字
            if CUDA:
                imgs = imgs.cuda()
                labels = labels.cuda()
            outs = module(imgs)
            # print(outs.shape)
            # print('outs:',outs)
            loss = loss_f(outs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # print('1111',loss)
            # print('2222',loss.data)#tensor且GPU
            # print('3333',loss.cpu())
            # print('4444',loss.cpu().data)#tensor且CPU
            # # print('5555',loss.cpu().data[0])#报错 IndexError: invalid index of a 0-dim tensor. Use tensor.item() to convert a 0-dim tensor to a Python number
            # # print('6666',loss.cpu().numpy())#报错 RuntimeError: Can't call numpy() on Variable that requires grad. Use var.detach().numpy() instead.
            # print('7777',loss.cpu().detach().numpy())
            # print('8888',loss.cpu().data.numpy())
            # print('9999',loss.cpu().item())
            # print('aaaa',loss.item())#后四者一样,都是把数值取出来
            train_loss += loss.cpu().item() * imgs.size(0)#imgs.size(0)批次
            '分类问题,常用torch.max(outs,1)得到索引来表示类别'
            _, prediction = torch.max(outs,1)#prediction对应每行最大值所在位置的索引值,即0-9
            train_acc += torch.sum(prediction == labels)
            # print(train_acc.cpu().item())

        adjust_lr_rate(epoch)
        train_loss = train_loss / 50000
        train_acc = train_acc.cpu().item() / 50000 #此处求概率必须用item()把数值取出,否则求出的不是小数

        '每训练完一个epoch,用测试集做一遍评估'
        test_acc = test()
        best_acc = 0
        if test_acc > best_acc:
            best_acc = test_acc
            if os.path.exists(tmp_param_path):
                shutil.copyfile(tmp_param_path, param_path)#防权重损坏
            torch.save(module.state_dict(),tmp_param_path)
        print('Epoch:',epoch,'Train_Loss:',train_loss,'Train_Acc:',train_acc,'Test_Acc:',test_acc)

train(100)

3. 扩展 

用如上训练好的模型做推断(只写出了完整的引用方法,并没输出最终分类结果)

import torch
import torch.nn as nn
from torchvision import transforms
from torchvision.models import squeezenet1_1
from PIL import Image

class Unit(nn.Module):#以上解释了这里为什么必须写出继承类nn.Module
    def __init__(self,inc,ouc):
        super(Unit,self).__init__()
        self.unit_net = nn.Sequential(nn.Conv2d(inc,ouc,kernel_size=3,padding=1),
                      nn.BatchNorm2d(ouc),
                      nn.ReLU())
    def forward(self, x):
        return self.unit_net(x)

class Net(nn.Module):
    def __init__(self):
        super(Net,self).__init__()
        self.net = nn.Sequential(Unit(3,32),#32*32
                                 Unit(32,32),
                                 Unit(32,32),

                                 nn.MaxPool2d(2),#16

                                 Unit(32,64),
                                 Unit(64,64),
                                 Unit(64,64),
                                 Unit(64,64),

                                 nn.MaxPool2d(2),#8

                                 Unit(64, 128),
                                 Unit(128, 128),
                                 Unit(128, 128),
                                 Unit(128, 128),

                                 nn.MaxPool2d(2),#4

                                 Unit(128,128),
                                 Unit(128,128),
                                 Unit(128,128),

                                 nn.AvgPool2d(4)#1
                                 )
        self.fc = nn.Linear(128, 10)

    def forward(self, x):
        y = self.net(x)
        y = y.view(-1,128)
        return self.fc(y)

CUDA = torch.cuda.is_available()
# module = squeezenet1_1(pretrained=True)#torchvision官方内置模型直接调用
module = Net()#自己的模型调用
module.load_state_dict(torch.load(r'./param/cifar10_cnn.pkl'))
module.eval()
if CUDA:
    module.cuda()

def predict_img(img_path):
    img = Image.open(img_path)
    img2data = transforms.Compose([
                        transforms.Resize(32),#因为这个模型是专门针对32*32的CIFAR10
                        transforms.ToTensor(),
                        transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))])
    data = img2data(img)
    '因为PyTorch将所有的图像当做批次,默认只处理四维数据'
    data = data.unsqueeze_(0)
    if CUDA:
        data = data.cuda()
    out = module(data)
    # print(out)
    _,index = torch.max(out,1)
    return index

if __name__ == '__main__':
    index = predict_img(r'C:\Users\admin\Desktop/bird.jpg')
    print(index)

  • 10
    点赞
  • 55
    收藏
    觉得还不错? 一键收藏
  • 2
    评论
好的,这是使用PyTorch实现的CNN模型CIFAR-10数据集进行图像分类代码: ``` python import torch import torch.nn as nn import torch.optim as optim import torchvision import torchvision.transforms as transforms # 定义超参数 num_epochs = 10 num_classes = 10 batch_size = 100 learning_rate = 0.001 # 加载CIFAR-10数据集 train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, transform=transforms.ToTensor(), download=True) test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, transform=transforms.ToTensor()) # 创建数据加载器 train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True) test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False) # 定义CNN模型 class ConvNet(nn.Module): def __init__(self, num_classes): super(ConvNet, self).__init__() self.layer1 = nn.Sequential( nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(32), nn.ReLU(), nn.MaxPool2d(kernel_size=2, stride=2)) self.layer2 = nn.Sequential( nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(64), nn.ReLU(), nn.MaxPool2d(kernel_size=2, stride=2)) self.layer3 = nn.Sequential( nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(128), nn.ReLU(), nn.MaxPool2d(kernel_size=2, stride=2)) self.fc1 = nn.Linear(4*4*128, 1024) self.fc2 = nn.Linear(1024, num_classes) def forward(self, x): out = self.layer1(x) out = self.layer2(out) out = self.layer3(out) out = out.reshape(out.size(0), -1) out = self.fc1(out) out = self.fc2(out) return out # 实例化CNN模型 model = ConvNet(num_classes) # 定义损失函数和优化器 criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=learning_rate) # 训练模型 total_step = len(train_loader) for epoch in range(num_epochs): for i, (images, labels) in enumerate(train_loader): # 前向传播 outputs = model(images) loss = criterion(outputs, labels) # 反向传播和优化 optimizer.zero_grad() loss.backward() optimizer.step() # 输出训练信息 if (i+1) % 100 == 0: print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' .format(epoch+1, num_epochs, i+1, total_step, loss.item())) # 测试模型 model.eval() with torch.no_grad(): correct = 0 total = 0 for images, labels in test_loader: outputs = model(images) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() print('Test Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total)) # 保存模型 torch.save(model.state_dict(), 'cnn.ckpt') ``` 这是一个三层卷积神经网络,使用Adam优化器和CrossEntropyLoss损失函数进行训练,并在测试集上进行测试,最后保存模型参数。
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值