训练一个分类器
之前的文章中已经介绍了Pytorch的基本内容,这次将会进入实战,尝试训练一个分类器并进行测试。
开始之前
Python提供了处理各种数据的package,如处理图片时可以使用Pillow、OpenCV等;处理音频时可以使用scipy、librosa等;处理文本时可以使用基于Python或Cython的原始加载或者选择NLTK、Spacy等。
但Pytorch针对视觉处理,专门创建了一个名为torchvision的package,其中包含用于常见数据集(如Imagenet,CIFAR10,MNIST等)的数据加载器,以及用于图像(即torchvision.datasets和torch.utils.data.DataLoader)的数据转换器,这为我们带来了极大的便利。
我们将使用CIFAR10数据集, 它具有以下类别:“飞机”,“汽车”,“鸟”,“猫”,“鹿”,“狗”,“青蛙”,“马”,“船”,“卡车”。 CIFAR-10中的图像尺寸为3x32x32,即尺寸为32x32像素的3通道彩色图像。
训练分类器的步骤
我们将按以下步骤完成分类器的训练:
1.使用Torchvision加载和标准化CIFAR10训练和测试数据集
2.定义卷积神经网络
3.定义损失函数
4.根据训练数据训练网络
5.在测试数据上测试网络
数据集下载
import torch
import torchvision
import torchvision.transforms as transforms
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4, shuffle=True, num_workers=2)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=4, shuffle=False, num_workers=2)
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
可以运行上述代码在官方地址下载,但速度较慢,大约需要几个小时。这里给一个百度云,下载后在项目目录新建一个data目录解压进去。
链接:CIFAR10数据集下载地址
提取码:o6uc
导入相关包以及加载数据集
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import time
import copy
import torch.optim as optim
MINI_BATCH = 8 # 处理数据量较大的数据集时,将数据集分成mini-batch,每次加载一个mini-batch的图片
DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') # 如果GPU可用,使用GPU
# 使用torchvision加载数据集并归一化
# Normalize(mean, std): mean:每个色彩通道的平均值,std:每个色彩通道的方差
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
# 加载训练集
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=False, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=MINI_BATCH, shuffle=True, num_workers=4)
# 加载测试集
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=False, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=4, shuffle=False, num_workers=4)
注意:如果后续训练在Windows下运行时出现BrokenPipeError错误,将上述代码中DataLoader函数的num_workers参数设置为0。
定义卷积神经网络
这里将新建一个net.py文件来定义网络模型。
# net.py
import torch
import torch.nn as nn
import torch.nn.functional as F
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(3, 6, 5) # 卷积层,输入3通道,输出6通道,过滤器尺寸5*5
self.conv2 = nn.Conv2d(6, 16, 5) # 卷积层,输入6通道,输出16通道,过滤器尺寸5*5
self.pool = nn.MaxPool2d(2, 2) # 池化层,使用最大池化,尺寸2*2,步长为2
# 全连接层:16*5*5 -> 120 -> 84 -> 10
self.fc1 = nn.Linear(16 * 5 * 5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
# 定义正向传播的数据流向
def forward(self, x):
x = F.relu(self.conv1(x)) # 使用relu激活函数
x = self.pool(x)
x = F.relu(self.conv2(x))
x = self.pool(x)
x = x.view(-1, 16 * 5 * 5) # 更改数据张量维度
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
该网络包含两个卷积层,每个卷积层后接一个池化层,之后是三个全连接层。最终输出的张量维度为(1*10),表示图片属于十个类别的概率。
定义训练函数,得到最优参数
回到之前的文件,继续定义模型训练函数。注意此时应导入net文件中的Net:
from net import Net
然后定义训练函数train:
def train(model, criterion, optimizer, epochs):
since = time.time()
best_acc = 0.0 # 记录模型测试时的最高准确率
best_model_wts = copy.deepcopy(model.state_dict()) # 记录模型测试出的最佳参数
for epoch in range(epochs):
print('-' * 30)
print('Epoch {}/{}'.format(epoch + 1, epochs))
# 训练模型
running_loss = 0.0
for i, data in enumerate(trainloader):
inputs, labels = data
inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
# 正向传播,计算损失
outputs = net(inputs)
loss = criterion(outputs, labels)
# 反向传播及优化
optimizer.zero_grad()
loss.backward()
optimizer.step()
running_loss += loss.item()
# 每训练1000次打印一次损失
if (0 != i) and (0 == i % 1000):
print('step:{:d}, loss:{:.3f}'.format(i, running_loss / 1000))
running_loss = 0.0
# 每个epoch以测试数据的整体准确率为标准测试一下模型
correct = 0
total = 0
with torch.no_grad():
for data in testloader:
images, labels = data
images, labels = images.to(DEVICE), labels.to(DEVICE)
outputs = net(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
acc = correct / total
if acc > best_acc: # 当准确率更高时更新
best_acc = acc
best_model_wts = copy.deepcopy(model.state_dict())
time_elapsed = time.time() - since
print('-' * 30)
print('训练用时:{:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
# 返回测试出的最佳模型
model.load_state_dict(best_model_wts)
return model
开始训练模型
定义好损失函数及优化器,然后调用train函数即可:
# 使用交叉熵损失函数,动量SGD优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
# 训练10个epoch
net = train(net, criterion, optimizer, 10)
# 保存模型参数
torch.save(net.state_dict(), 'net_dict.pt')
训练结果如下,整个过程会耗费几分钟的时间。
测试模型
我们将新建一个predict.py文件进行模型的测试。
首先导入测试数据并加载模型:
# predict.py
import torch
import torchvision
import torchvision.transforms as transforms
from net import Net
import matplotlib.pyplot as plt
import numpy as np
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
# 加载测试集
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=False, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=4, shuffle=False, num_workers=0)
net = Net()
net.load_state_dict(torch.load('net_dict.pt')) # 加载各层参数
net.to(DEVICE)
然后定义predict函数进行预测:
def predict():
# 整体准确率
correct = 0
total = 0
with torch.no_grad():
for data in testloader:
images, labels = data
images, labels = images.to(DEVICE), labels.to(DEVICE)
outputs = net(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print('整体准确率:{}%'.format(100 * correct / total))
print('=' * 30)
# 每一个类别的准确率
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
with torch.no_grad():
for data in testloader:
images, labels = data
if torch.cuda.is_available():
images, labels = images.cuda(), labels.cuda()
outputs = net(images)
_, predicted = torch.max(outputs, 1)
c = (predicted == labels).squeeze()
for i in range(4):
label = labels[i]
class_correct[label] += c[i].item()
class_total[label] += 1
for i in range(10):
print('{}的准确率:{:.2f}%'.format(classes[i], 100 * class_correct[i] / class_total[i]))
在主程序中调用该函数,可得如下运行结果(准确度可能略有差异):
展示部分预测样例
我们可以输出几张测试集的图片以及它们的标签和预测结果。
首先定义一个函数来展示图片:
def imshow(img):
# 输入数据:torch.tensor[c, h, w]
img = img / 2 + 0.5 # 反归一
npimg = np.transpose(img.numpy(), (1, 2, 0)) # [c, h, w] -> [h, w, c]
plt.imshow(npimg)
plt.show()
然后定义一个函数输出几个例子:
def show_example():
# 取一批图片
testdata = iter(testloader)
images, labels = testdata.next()
imshow(torchvision.utils.make_grid(images))
print('真实类别:', ' '.join('{}'.format(classes[labels[j]]) for j in range(4)))
# 预测值为十个类别的权重,因此输出最高权重的标签
if torch.cuda.is_available():
images, labels = images.cuda(), labels.cuda()
outputs = net(images)
_, predicted = torch.max(outputs, 1)
print('预测结果:', ' '.join('{}'.format(classes[labels[j]]) for j in range(4)))
在主程序中调用show_example函数,可得到如下结果:
这样一个分类器就训练完成了,可以尝试修改各参数或更改网络模型结构来得到更好的结果。
完整代码
net.py
# net.py
import torch
import torch.nn as nn
import torch.nn.functional as F
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(3, 6, 5) # 卷积层,输入3通道,输出6通道,过滤器尺寸5*5
self.conv2 = nn.Conv2d(6, 16, 5) # 卷积层,输入6通道,输出16通道,过滤器尺寸5*5
self.pool = nn.MaxPool2d(2, 2) # 池化层,使用最大池化,尺寸2*2,步长为2
# 全连接层:16*5*5 -> 120 -> 84 -> 10
self.fc1 = nn.Linear(16 * 5 * 5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
# 定义正向传播的数据流向
def forward(self, x):
x = F.relu(self.conv1(x)) # 使用relu激活函数
x = self.pool(x)
x = F.relu(self.conv2(x))
x = self.pool(x)
x = x.view(-1, 16 * 5 * 5) # 更改数据张量维度
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
train.py
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import time
import copy
import torch.optim as optim
from net import Net
MINI_BATCH = 8 # 处理大数据集时,将数据集分成mini-batch,每次加载一个mini-batch
DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') # 如果GPU可用,使用GPU
# 使用torchvision加载数据集并归一化
# Normalize(std, mean): std:每个色彩通道的标准差,mean:每个色彩通道的平均值
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
# 加载训练集
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=False, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=MINI_BATCH, shuffle=True, num_workers=0)
# 加载测试集
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=False, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=4, shuffle=False, num_workers=0)
net = Net()
net.to(DEVICE)
def train(model, criterion, optimizer, epochs):
since = time.time()
best_acc = 0.0 # 记录模型测试时的最高准确率
best_model_wts = copy.deepcopy(model.state_dict()) # 记录模型测试出的最佳参数
for epoch in range(epochs):
print('-' * 30)
print('Epoch {}/{}'.format(epoch + 1, epochs))
# 训练模型
running_loss = 0.0
for i, data in enumerate(trainloader):
inputs, labels = data
inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
# 正向传播,计算损失
outputs = net(inputs)
loss = criterion(outputs, labels)
# 反向传播及优化
optimizer.zero_grad()
loss.backward()
optimizer.step()
running_loss += loss.item()
# 每训练1000次打印一次损失
if (0 != i) and (0 == i % 1000):
print('step:{:d}, loss:{:.3f}'.format(i, running_loss / 1000))
running_loss = 0.0
# 每个epoch以测试数据的整体准确率为标准测试一下模型
correct = 0
total = 0
with torch.no_grad():
for data in testloader:
images, labels = data
images, labels = images.to(DEVICE), labels.to(DEVICE)
outputs = net(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
acc = correct / total
if acc > best_acc: # 当准确率更高时更新
best_acc = acc
best_model_wts = copy.deepcopy(model.state_dict())
time_elapsed = time.time() - since
print('-' * 30)
print('训练用时:{:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
# 返回测试出的最佳模型
model.load_state_dict(best_model_wts)
return model
# 使用交叉熵损失函数,动量SGD优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
# 训练10个epoch
net = train(net, criterion, optimizer, 10)
# 保存模型参数
torch.save(net.state_dict(), 'net_dict.pt')
predict.py
# predict.py
import torch
import torchvision
import torchvision.transforms as transforms
from net import Net
import matplotlib.pyplot as plt
import numpy as np
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
# 加载测试集
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=False, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=4, shuffle=False, num_workers=0)
net = Net()
net.load_state_dict(torch.load('net_dict.pt')) # 加载各层参数
net.to(DEVICE)
def predict():
# 整体准确率
correct = 0
total = 0
with torch.no_grad():
for data in testloader:
images, labels = data
images, labels = images.to(DEVICE), labels.to(DEVICE)
outputs = net(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print('整体准确率:{}%'.format(100 * correct / total))
print('=' * 30)
# 每一个类别的准确率
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
with torch.no_grad():
for data in testloader:
images, labels = data
if torch.cuda.is_available():
images, labels = images.cuda(), labels.cuda()
outputs = net(images)
_, predicted = torch.max(outputs, 1)
c = (predicted == labels).squeeze()
for i in range(4):
label = labels[i]
class_correct[label] += c[i].item()
class_total[label] += 1
for i in range(10):
print('{}的准确率:{:.2f}%'.format(classes[i], 100 * class_correct[i] / class_total[i]))
def imshow(img):
# 输入数据:torch.tensor[c, h, w]
img = img / 2 + 0.5 # 反归一
npimg = np.transpose(img.numpy(), (1, 2, 0)) # [c, h, w] -> [h, w, c]
plt.imshow(npimg)
plt.show()
def show_example():
# 取一批图片
testdata = iter(testloader)
images, labels = testdata.next()
imshow(torchvision.utils.make_grid(images))
print('真实类别:', ' '.join('{}'.format(classes[labels[j]]) for j in range(4)))
# 预测值为十个类别的权重,因此输出最高权重的标签
if torch.cuda.is_available():
images, labels = images.cuda(), labels.cuda()
outputs = net(images)
_, predicted = torch.max(outputs, 1)
print('预测结果:', ' '.join('{}'.format(classes[labels[j]]) for j in range(4)))
predict() # 进行预测
show_example() # 部分预测结果展示