Python深度学习实例--CIFAR-10图像数据分类
1. CIFAR-10图像数据集
1.1 数据集介绍
CIFAR-10由60000张32*32的RGB图像组成,共有10类,分别是,飞机(0)、汽车(1)、鸟(2)、猫(3)、鹿(4)、狗(5)、青蛙(6)、马(7)、船(8)和卡车(9)。下图展示了CIFAR-10图像数据集。
1.2 下载数据集
%matplotlib inline
from matplotlib import pyplot as plt
import numpy as np
import torch
from torchvision import datasets
data_path = '../data-unversioned/p1ch7/'
cifar10 = datasets.CIFAR10(data_path, train=True, download=True) # 第1个参数是数据的下载位置;第2个参数指定对训练集和验证集感兴趣;第3个参数表示如果在第1个参数指定位置找不到依旧下载
cifar10_val = datasets.CIFAR10(data_path, train=False, download=True) #
class_names = ['airplane','automobile','bird','cat','deer',
'dog','frog','horse','ship','truck']
fig = plt.figure(figsize=(8,3))
num_classes = 10
for i in range(num_classes):
ax = fig.add_subplot(2, 5, 1 + i, xticks=[], yticks=[])
ax.set_title(class_names[i])
img = next(img for img, label in cifar10 if label == i)
plt.imshow(img)
plt.show()
输出:
torch.utils.data.Dataset是具有2种函数的对象:len()和__getitem__(),前者返回数据中的项数,后者返回由样本和与之对应的标签组成的项。
len(cifar10)
img, label = cifar10[99] #数据集配备了--getitem--函数
img, label, class_names[label]
分别输出:
50000
(<PIL.Image.Image image mode=RGB size=32x32>, 1, 'automobile')
1.3 Dataset变换
引入torchvision.tranforms模块,将PIL图像变换为Pytorch张量。
from torchvision import transforms
to_tensor = transforms.ToTensor()
img_t = to_tensor(img) #将python图片转化为张量
img_t.shape
输出:
torch.Size([3, 32, 32])
我们可以将变换直接作为参数转递给dataset.CIFAR10
tensor_cifar10 = datasets.CIFAR10(data_path, train=True, download=False,
transform=transforms.ToTensor())
此时访问数据集的元素将返回一个张量,而不是PIL图像
img_t, _ = tensor_cifar10[99]
type(img_t)
img_t.shape, img_t.dtype
img_t.min(), img_t.max() #原始PIL图像的值为0-255,而ToTensor变换将数据变换为每个通道的32位浮点数,将值缩放到0-1
分别输出:
torch.Tensor
(torch.Size([3, 32, 32]), torch.float32)
(tensor(0.), tensor(1.))
plt.imshow(img_t.permute(1, 2, 0)) # 使用permute()将坐标轴的顺序进行变换以匹配Matplotlib的期望
plt.show()
输出:
1.4 数据归一化
通过计算每个通道的平均值和标准差使每个通道的均值为0、标准差为1。
imgs = torch.stack([img_t for img_t, _ in tensor_cifar10], dim=3)
imgs.shape
imgs.view(3, -1).mean(dim=1)
imgs.view(3, -1).std(dim=1)
分别输出:
torch.Size([3, 32, 32, 50000])
tensor([0.4914, 0.4822, 0.4465])
tensor([0.2470, 0.2435, 0.2616])
transformed_cifar10 = datasets.CIFAR10(
data_path, train=True, download=False,
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.4915, 0.4823, 0.4468),
(0.2470, 0.2435, 0.2616))
]))
transformed_cifar10_val = datasets.CIFAR10(
data_path, train=False, download=False,
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.4915, 0.4823, 0.4468),
(0.2470, 0.2435, 0.2616))
]))
这时,从数据集绘制的图像不能为我们提供实际图像的真实表示
img_t, _ = transformed_cifar10[99]
plt.imshow(img_t.permute(1, 2, 0))
plt.show()
归一化后得到的红色汽车图像如下图所示。
2. 区分CIFAR-10图像数据集中的鸟和飞机
2.1 构建数据集
label_map = {0: 0, 2: 1} #创建一个只包含鸟和飞机的数据集子类
class_names = ['airplane', 'bird']
cifar2 = [(img, label_map[label])
for img, label in cifar10
if label in [0, 2]]
cifar2_val = [(img, label_map[label])
for img, label in cifar10_val
if label in [0, 2]]
2.2 构建一个全连接模型
import torch
import torch.nn as nn
import torch.optim as optim
model = nn.Sequential(
nn.Linear(3072, 512),
nn.Tanh(),
nn.Linear(512, 2),
nn.LogSoftmax(dim=1))
learning_rate = 1e-2
optimizer = optim.SGD(model.parameters(), lr=learning_rate)
loss_fn = nn.NLLLoss()
n_epochs = 100
for epoch in range(n_epochs):
for img, label in cifar2:
out = model(img.view(-1).unsqueeze(0))
loss = loss_fn(out, torch.tensor([label]))
optimizer.zero_grad()
loss.backward()
optimizer.step()
print("Epoch: %d, Loss: %f" % (epoch, float(loss)))
进行小批量训练
train_loader = torch.utils.data.DataLoader(cifar2, batch_size=64,
shuffle=True)
import torch
import torch.nn as nn
import torch.optim as optim
train_loader = torch.utils.data.DataLoader(cifar2, batch_size=64,
shuffle=True)
model = nn.Sequential(
nn.Linear(3072, 512), #512
nn.Tanh(),
nn.Linear(512, 2),
nn.LogSoftmax(dim=1))
learning_rate = 1e-2
optimizer = optim.SGD(model.parameters(), lr=learning_rate)
loss_fn = nn.NLLLoss()
n_epochs = 100
for epoch in range(n_epochs):
for imgs, labels in train_loader:
outputs = model(imgs.view(imgs.shape[0], -1))
loss = loss_fn(outputs, labels)
optimizer.zero_grad()
loss.backward()
optimizer.step()
print("Epoch: %d, Loss: %f" % (epoch, float(loss)))
在训练集和验证集上计算分类的准确性
train_loader = torch.utils.data.DataLoader(cifar2, batch_size=64, #训练集上的准确性
shuffle=False)
correct = 0
total = 0
with torch.no_grad():
for imgs, labels in train_loader:
outputs = model(imgs.view(imgs.shape[0], -1))
_, predicted = torch.max(outputs, dim=1)
total += labels.shape[0]
correct += int((predicted == labels).sum())
print("Accuracy: %f" % (correct / total))
输出:
Accuracy: 0.997700
val_loader = torch.utils.data.DataLoader(cifar2_val, batch_size=64, #验证集上的准确性
shuffle=False)
correct = 0
total = 0
with torch.no_grad():
for imgs, labels in val_loader:
outputs = model(imgs.view(imgs.shape[0], -1))
_, predicted = torch.max(outputs, dim=1)
total += labels.shape[0]
correct += int((predicted == labels).sum())
print("Accuracy: %f" % (correct / total))
输出:
val_loader = torch.utils.data.DataLoader(cifar2_val, batch_size=64, #验证集上的准确性
shuffle=False)
correct = 0
total = 0
with torch.no_grad():
for imgs, labels in val_loader:
outputs = model(imgs.view(imgs.shape[0], -1))
_, predicted = torch.max(outputs, dim=1)
total += labels.shape[0]
correct += int((predicted == labels).sum())
print("Accuracy: %f" % (correct / total))
输出:
Accuracy: 0.821000
用nn.CrossEntropyLoss()代替nn.NLLLoss(),更改全连接模型。
import torch
import torch.nn as nn
import torch.optim as optim
train_loader = torch.utils.data.DataLoader(cifar2, batch_size=64,
shuffle=True)
model = nn.Sequential(
nn.Linear(3072, 1024),
nn.Tanh(),
nn.Linear(1024, 512),
nn.Tanh(),
nn.Linear(512, 128),
nn.Tanh(),
nn.Linear(128, 2))
learning_rate = 1e-2
optimizer = optim.SGD(model.parameters(), lr=learning_rate)
loss_fn = nn.CrossEntropyLoss()
n_epochs = 100
for epoch in range(n_epochs):
for imgs, labels in train_loader:
outputs = model(imgs.view(imgs.shape[0], -1))
loss = loss_fn(outputs, labels)
optimizer.zero_grad()
loss.backward()
optimizer.step()
print("Epoch: %d, Loss: %f" % (epoch, float(loss))
Pytorch的nn.Model的parameters()方法提供了一种快速确定模型有多个参数的方法,可以调用numel,把各分量的元素加起来得到张量元素的总数。
numel_list=[p.numel() for p in model.parameters() if p.requires_grad == True]
sum(numel_list),numel_list
输出:
(3737474, [3145728, 1024, 524288, 512, 65536, 128, 256, 2])
2.3 构建一个卷积模型
由于参数量巨大,利用全连接层进行图像分类是不现实的,接下来将会使用卷积层来构建分类模型。
class Net(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
self.act1 = nn.Tanh()
self.pool1 = nn.MaxPool2d(2)
self.conv2 = nn.Conv2d(16, 8, kernel_size=3, padding=1)
self.act2 = nn.Tanh()
self.pool2 = nn.MaxPool2d(2)
self.fc1 = nn.Linear(8 * 8 * 8, 32)
self.act3 = nn.Tanh()
self.fc2 = nn.Linear(32, 2)
def forward(self, x):
out = self.pool1(self.act1(self.conv1(x)))
out = self.pool2(self.act2(self.conv2(out)))
out = out.view(-1, 8 * 8 * 8) # <1>
out = self.act3(self.fc1(out))
out = self.fc2(out)
return out
model = Net()
numel_list = [p.numel() for p in model.parameters()]
sum(numel_list), numel_list
输出:
(18090, [432, 16, 1152, 8, 16384, 32, 64, 2])
首先,具体查看输入图像经过网络中的一个二维卷积产生一个二维图像,它的像素是输入图像领域的加权和。
img, _ = cifar2[0]
conv = nn.Conv2d(3, 16, kernel_size=3, padding=1)
output = conv(img.unsqueeze(0))
img.unsqueeze(0).shape, output.shape
输出:
(torch.Size([1, 3, 32, 32]), torch.Size([1, 1, 32, 32]))
画出输入图像和输出图像:
然后,查看一下经过最大池化层图像形状的变换。
pool = nn.MaxPool2d(2)
output = pool(img.unsqueeze(0))
img.unsqueeze(0).shape, output.shape
输出:
(torch.Size([1, 3, 32, 32]), torch.Size([1, 3, 16, 16]))
最后,查看一下图像经过整个网络后的输出。
model = Net()
model(img.unsqueeze(0))
输出:
tensor([[-0.0157, 0.1143]], grad_fn=<AddmmBackward>)
换一种定义网络的形式,利用函数式API来定义网络。
import torch.nn.functional as F
class Net(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
self.conv2 = nn.Conv2d(16, 8, kernel_size=3, padding=1)
self.fc1 = nn.Linear(8 * 8 * 8, 32)
self.fc2 = nn.Linear(32, 2)
def forward(self, x):
out = F.max_pool2d(torch.tanh(self.conv1(x)), 2)
out = F.max_pool2d(torch.tanh(self.conv2(out)), 2)
out = out.view(-1, 8 * 8 * 8)
out = torch.tanh(self.fc1(out))
out = self.fc2(out)
return out
开始在GPU训练
device = (torch.device('cuda') if torch.cuda.is_available()
else torch.device('cpu'))
print(f"Training on device {device}.")
import datetime
def training_loop(n_epochs, optimizer, model, loss_fn, train_loader,val_loader):
for epoch in range(n_epochs):
loss_train = 0.0
loss_val = 0.0
for imgs, labels in train_loader:
imgs = imgs.to(device=device)
labels = labels.to(device=device)
outputs = model(imgs)
loss = loss_fn(outputs, labels)
optimizer.zero_grad()
loss.backward()
optimizer.step()
loss_train += loss.item()
optimizer.zero_grad()
for imgs_val,label_val in val_loader:
imgs_val = imgs_val.to(device=device)
labels_val = labels_val.to(device=device)
outputs_val=model(imgs_val)
loss_batch_val = loss_fn(outputs_val,labels_val)
loss_val+=loss_batch_val.item()
if epoch == 0 or (epoch+1) % 10 == 0:
print('{} Epoch {}, Training loss {},Val loss{}'.format(
datetime.datetime.now(), epoch,
loss_train / len(train_loader),
loss_val / len(val_loader)))
train_loader = torch.utils.data.DataLoader(cifar2, batch_size=64, shuffle=True)
val_loader = torch.utils.data.DataLoader(cifar2_val, batch_size=64, shuffle=False)
model = Net().to(device=device)
optimizer = optim.SGD(model.parameters(), lr=1e-2)
loss_fn = nn.CrossEntropyLoss()
training_loop(
n_epochs = 100,
optimizer = optimizer,
model = model,
loss_fn = loss_fn,
train_loader = train_loader,
val_loader = val_loader,
)