Python深度学习实例--CIFAR-10图像数据分类

1. CIFAR-10图像数据集

1.1 数据集介绍

CIFAR-10由60000张32*32的RGB图像组成,共有10类,分别是,飞机(0)、汽车(1)、鸟(2)、猫(3)、鹿(4)、狗(5)、青蛙(6)、马(7)、船(8)和卡车(9)。下图展示了CIFAR-10图像数据集。
在这里插入图片描述

1.2 下载数据集

%matplotlib inline
from matplotlib import pyplot as plt
import numpy as np
import torch
from torchvision import datasets
data_path = '../data-unversioned/p1ch7/'
cifar10 = datasets.CIFAR10(data_path, train=True, download=True) # 第1个参数是数据的下载位置;第2个参数指定对训练集和验证集感兴趣;第3个参数表示如果在第1个参数指定位置找不到依旧下载
cifar10_val = datasets.CIFAR10(data_path, train=False, download=True) # 
class_names = ['airplane','automobile','bird','cat','deer',
               'dog','frog','horse','ship','truck']

fig = plt.figure(figsize=(8,3))
num_classes = 10
for i in range(num_classes):
    ax = fig.add_subplot(2, 5, 1 + i, xticks=[], yticks=[])
    ax.set_title(class_names[i])
    img = next(img for img, label in cifar10 if label == i)
    plt.imshow(img)
plt.show()

输出:
在这里插入图片描述
torch.utils.data.Dataset是具有2种函数的对象:len()和__getitem__(),前者返回数据中的项数,后者返回由样本和与之对应的标签组成的项。

len(cifar10) 
img, label = cifar10[99] #数据集配备了--getitem--函数
img, label, class_names[label]

分别输出:

50000
(<PIL.Image.Image image mode=RGB size=32x32>, 1, 'automobile')

1.3 Dataset变换

引入torchvision.tranforms模块,将PIL图像变换为Pytorch张量。

from torchvision import transforms
to_tensor = transforms.ToTensor()
img_t = to_tensor(img) #将python图片转化为张量
img_t.shape

输出:

torch.Size([3, 32, 32])

我们可以将变换直接作为参数转递给dataset.CIFAR10

tensor_cifar10 = datasets.CIFAR10(data_path, train=True, download=False,
                          transform=transforms.ToTensor())

此时访问数据集的元素将返回一个张量,而不是PIL图像

img_t, _ = tensor_cifar10[99]
type(img_t)
img_t.shape, img_t.dtype
img_t.min(), img_t.max() #原始PIL图像的值为0-255,而ToTensor变换将数据变换为每个通道的32位浮点数,将值缩放到0-1

分别输出:

torch.Tensor
(torch.Size([3, 32, 32]), torch.float32)
(tensor(0.), tensor(1.))
plt.imshow(img_t.permute(1, 2, 0))  # 使用permute()将坐标轴的顺序进行变换以匹配Matplotlib的期望
plt.show()

输出:
在这里插入图片描述

1.4 数据归一化

通过计算每个通道的平均值和标准差使每个通道的均值为0、标准差为1。

imgs = torch.stack([img_t for img_t, _ in tensor_cifar10], dim=3) 
imgs.shape
imgs.view(3, -1).mean(dim=1)
imgs.view(3, -1).std(dim=1)

分别输出:

torch.Size([3, 32, 32, 50000])
tensor([0.4914, 0.4822, 0.4465])
tensor([0.2470, 0.2435, 0.2616])
transformed_cifar10 = datasets.CIFAR10(
    data_path, train=True, download=False,
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4915, 0.4823, 0.4468),
                             (0.2470, 0.2435, 0.2616))
    ]))
transformed_cifar10_val = datasets.CIFAR10(
    data_path, train=False, download=False,
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4915, 0.4823, 0.4468),
                             (0.2470, 0.2435, 0.2616))
    ]))

这时,从数据集绘制的图像不能为我们提供实际图像的真实表示

img_t, _ = transformed_cifar10[99]
plt.imshow(img_t.permute(1, 2, 0))
plt.show()

归一化后得到的红色汽车图像如下图所示。
在这里插入图片描述

2. 区分CIFAR-10图像数据集中的鸟和飞机

2.1 构建数据集

label_map = {0: 0, 2: 1} #创建一个只包含鸟和飞机的数据集子类
class_names = ['airplane', 'bird']
cifar2 = [(img, label_map[label])
          for img, label in cifar10 
          if label in [0, 2]]
cifar2_val = [(img, label_map[label])
              for img, label in cifar10_val
              if label in [0, 2]]

2.2 构建一个全连接模型

import torch
import torch.nn as nn
import torch.optim as optim

model = nn.Sequential(
            nn.Linear(3072, 512),
            nn.Tanh(),
            nn.Linear(512, 2),
            nn.LogSoftmax(dim=1))

learning_rate = 1e-2

optimizer = optim.SGD(model.parameters(), lr=learning_rate)

loss_fn = nn.NLLLoss()

n_epochs = 100

for epoch in range(n_epochs):
    for img, label in cifar2:
        out = model(img.view(-1).unsqueeze(0))
        loss = loss_fn(out, torch.tensor([label]))
                
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print("Epoch: %d, Loss: %f" % (epoch, float(loss)))

进行小批量训练

train_loader = torch.utils.data.DataLoader(cifar2, batch_size=64,
                                           shuffle=True)
import torch
import torch.nn as nn
import torch.optim as optim

train_loader = torch.utils.data.DataLoader(cifar2, batch_size=64,
                                           shuffle=True)

model = nn.Sequential(
            nn.Linear(3072, 512), #512
            nn.Tanh(),
            nn.Linear(512, 2),
            nn.LogSoftmax(dim=1))

learning_rate = 1e-2

optimizer = optim.SGD(model.parameters(), lr=learning_rate)

loss_fn = nn.NLLLoss()

n_epochs = 100

for epoch in range(n_epochs):
    for imgs, labels in train_loader:
        outputs = model(imgs.view(imgs.shape[0], -1))
        loss = loss_fn(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print("Epoch: %d, Loss: %f" % (epoch, float(loss)))

在训练集和验证集上计算分类的准确性

train_loader = torch.utils.data.DataLoader(cifar2, batch_size=64, #训练集上的准确性
                                           shuffle=False)

correct = 0
total = 0

with torch.no_grad():
    for imgs, labels in train_loader:
        outputs = model(imgs.view(imgs.shape[0], -1))
        _, predicted = torch.max(outputs, dim=1)
        total += labels.shape[0]
        correct += int((predicted == labels).sum())
        
print("Accuracy: %f" % (correct / total))

输出:

Accuracy: 0.997700
val_loader = torch.utils.data.DataLoader(cifar2_val, batch_size=64, #验证集上的准确性
                                         shuffle=False)

correct = 0
total = 0

with torch.no_grad():
    for imgs, labels in val_loader:
        outputs = model(imgs.view(imgs.shape[0], -1))
        _, predicted = torch.max(outputs, dim=1)
        total += labels.shape[0]
        correct += int((predicted == labels).sum())
        
print("Accuracy: %f" % (correct / total))

输出:

val_loader = torch.utils.data.DataLoader(cifar2_val, batch_size=64, #验证集上的准确性
                                         shuffle=False)

correct = 0
total = 0

with torch.no_grad():
    for imgs, labels in val_loader:
        outputs = model(imgs.view(imgs.shape[0], -1))
        _, predicted = torch.max(outputs, dim=1)
        total += labels.shape[0]
        correct += int((predicted == labels).sum())
        
print("Accuracy: %f" % (correct / total))

输出:

Accuracy: 0.821000

用nn.CrossEntropyLoss()代替nn.NLLLoss(),更改全连接模型。

import torch
import torch.nn as nn
import torch.optim as optim

train_loader = torch.utils.data.DataLoader(cifar2, batch_size=64,
                                           shuffle=True)

model = nn.Sequential(
            nn.Linear(3072, 1024),
            nn.Tanh(),
            nn.Linear(1024, 512),
            nn.Tanh(),
            nn.Linear(512, 128),
            nn.Tanh(),
            nn.Linear(128, 2))

learning_rate = 1e-2

optimizer = optim.SGD(model.parameters(), lr=learning_rate)

loss_fn = nn.CrossEntropyLoss()

n_epochs = 100

for epoch in range(n_epochs):
    for imgs, labels in train_loader:
        outputs = model(imgs.view(imgs.shape[0], -1))
        loss = loss_fn(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print("Epoch: %d, Loss: %f" % (epoch, float(loss))

Pytorch的nn.Model的parameters()方法提供了一种快速确定模型有多个参数的方法,可以调用numel,把各分量的元素加起来得到张量元素的总数。

numel_list=[p.numel() for p in model.parameters() if p.requires_grad == True]
sum(numel_list),numel_list

输出:

(3737474, [3145728, 1024, 524288, 512, 65536, 128, 256, 2])

2.3 构建一个卷积模型

由于参数量巨大,利用全连接层进行图像分类是不现实的,接下来将会使用卷积层来构建分类模型。

class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        self.act1 = nn.Tanh()
        self.pool1 = nn.MaxPool2d(2)
        self.conv2 = nn.Conv2d(16, 8, kernel_size=3, padding=1)
        self.act2 = nn.Tanh()
        self.pool2 = nn.MaxPool2d(2)
        self.fc1 = nn.Linear(8 * 8 * 8, 32)
        self.act3 = nn.Tanh()
        self.fc2 = nn.Linear(32, 2)

    def forward(self, x):
        out = self.pool1(self.act1(self.conv1(x)))
        out = self.pool2(self.act2(self.conv2(out)))
        out = out.view(-1, 8 * 8 * 8) # <1>
        out = self.act3(self.fc1(out))
        out = self.fc2(out)
        return out
model = Net()

numel_list = [p.numel() for p in model.parameters()]
sum(numel_list), numel_list

输出:

(18090, [432, 16, 1152, 8, 16384, 32, 64, 2])

首先,具体查看输入图像经过网络中的一个二维卷积产生一个二维图像,它的像素是输入图像领域的加权和。

img, _ = cifar2[0]
conv = nn.Conv2d(3, 16, kernel_size=3, padding=1) 
output = conv(img.unsqueeze(0))
img.unsqueeze(0).shape, output.shape

输出:

(torch.Size([1, 3, 32, 32]), torch.Size([1, 1, 32, 32]))

画出输入图像和输出图像:
在这里插入图片描述
然后,查看一下经过最大池化层图像形状的变换。

pool = nn.MaxPool2d(2)
output = pool(img.unsqueeze(0))
img.unsqueeze(0).shape, output.shape

输出:

(torch.Size([1, 3, 32, 32]), torch.Size([1, 3, 16, 16]))

最后,查看一下图像经过整个网络后的输出。

model = Net()
model(img.unsqueeze(0))

输出:

tensor([[-0.0157,  0.1143]], grad_fn=<AddmmBackward>)

换一种定义网络的形式,利用函数式API来定义网络。

import torch.nn.functional as F
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(16, 8, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(8 * 8 * 8, 32)
        self.fc2 = nn.Linear(32, 2)
        
    def forward(self, x):
        out = F.max_pool2d(torch.tanh(self.conv1(x)), 2)
        out = F.max_pool2d(torch.tanh(self.conv2(out)), 2)
        out = out.view(-1, 8 * 8 * 8)
        out = torch.tanh(self.fc1(out))
        out = self.fc2(out)
        return out

开始在GPU训练

device = (torch.device('cuda') if torch.cuda.is_available()
          else torch.device('cpu'))
print(f"Training on device {device}.")

 

import datetime  
def training_loop(n_epochs, optimizer, model, loss_fn, train_loader,val_loader):
    for epoch in range(n_epochs):  
        loss_train = 0.0
        loss_val = 0.0
        for imgs, labels in train_loader:  
            imgs = imgs.to(device=device)  
            labels = labels.to(device=device)
            outputs = model(imgs)             
            loss = loss_fn(outputs, labels) 
            optimizer.zero_grad()        
            loss.backward()          
            optimizer.step()  
            loss_train += loss.item()  
        optimizer.zero_grad()
        for imgs_val,label_val in val_loader:
            imgs_val = imgs_val.to(device=device)  
            labels_val = labels_val.to(device=device)
            outputs_val=model(imgs_val)
            loss_batch_val = loss_fn(outputs_val,labels_val)
            loss_val+=loss_batch_val.item()
        if epoch == 0 or (epoch+1) % 10 == 0:
            print('{} Epoch {}, Training loss {},Val loss{}'.format(
                datetime.datetime.now(), epoch,
                loss_train / len(train_loader),
                loss_val / len(val_loader)))
                
train_loader = torch.utils.data.DataLoader(cifar2, batch_size=64, shuffle=True)
val_loader = torch.utils.data.DataLoader(cifar2_val, batch_size=64, shuffle=False)
model = Net().to(device=device)  
optimizer = optim.SGD(model.parameters(), lr=1e-2)
loss_fn = nn.CrossEntropyLoss()

training_loop(
    n_epochs = 100,
    optimizer = optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_loader,
    val_loader = val_loader,
)               

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值