Pytorch: 微调预训练好的卷积神经网络(VGG) 识别十类猴子
Copyright: Jingmin Wei, Pattern Recognition and Intelligent System, School of Artificial and Intelligence, Huazhong University of Science and Technology
本教程不商用,仅供学习和参考交流使用,如需转载,请联系本人。
猴子的数据地址为:https://www.kaggle.com/slothkong/10-monkey-species 。其中包含了训练集和验证集
微调预训练的VGG16网络
import numpy as np
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns
import hiddenlayer as hl
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as Data
from torchvision import models
from torchvision import transforms
from torchvision.datasets import ImageFolder
from torchsummary import summary
from torchviz import make_dot
# 模型加载选择GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
print(torch.cuda.device_count())
print(torch.cuda.get_device_name(0))
cuda
1
GeForce MX250
冻结特征提取层参数,不更新权重,以提高网络训练速度
# 导入vgg16网络
vgg16 = models.vgg16(pretrained=True)
# 获取vgg16的特征提取层
vgg = vgg16.features
# 将vgg16的特征提取层的参数冻结,不对其进行更新
for param in vgg.parameters():
param.requires_grad_(False)
在VGG16的基础上,设计全连接层,512、256、10。在前向传播函数中,由 self.classify 得到输出
class MyVggModel(nn.Module):
def __init__(self):
super(MyVggModel, self).__init__()
# vgg16的特征提取层
self.vgg = vgg
# 添加新的全连接层
self.classifier = nn.Sequential(nn.Linear(25088, 512),
nn.ReLU(),
nn.Dropout(p=0.5),
nn.Linear(512, 256),
nn.ReLU(),
nn.Dropout(p=0.5),
nn.Linear(256, 10),
nn.Softmax(dim=1))
def forward(self, x):
# 前向传播
x = self.vgg(x)
x = x.view(x.size(0), -1)
output = self.classifier(x)
return output
Myvggc = MyVggModel().to(device)
print(Myvggc)
MyVggModel(
(vgg): Sequential(
(0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): ReLU(inplace=True)
(2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(3): ReLU(inplace=True)
(4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(6): ReLU(inplace=True)
(7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(8): ReLU(inplace=True)
(9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(11): ReLU(inplace=True)
(12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(13): ReLU(inplace=True)
(14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(15): ReLU(inplace=True)
(16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(17): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(18): ReLU(inplace=True)
(19): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(20): ReLU(inplace=True)
(21): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(22): ReLU(inplace=True)
(23): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(24): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(25): ReLU(inplace=True)
(26): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(27): ReLU(inplace=True)
(28): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(29): ReLU(inplace=True)
(30): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
(classifier): Sequential(
(0): Linear(in_features=25088, out_features=512, bias=True)
(1): ReLU()
(2): Dropout(p=0.5, inplace=False)
(3): Linear(in_features=512, out_features=256, bias=True)
(4): ReLU()
(5): Dropout(p=0.5, inplace=False)
(6): Linear(in_features=256, out_features=10, bias=True)
(7): Softmax(dim=1)
)
)
# 输出网络结构
x = torch.randn(1, 3, 224, 224).requires_grad_(True) # 1个样本,大小为(3, 224, 224)
y = Myvggc(x)
myvgg_vis = make_dot(y, params=dict(list(Myvggc.named_parameters()) + [('x', x)]))
myvgg_vis
summary(Myvggc.cuda(), input_size=(3, 224, 224))
# 这里说明,如果要修改输入图像的大小,需要同时通过层层计算,最后修改全连接层第一个参数25088即可
# 要保证网络能正确运行,需要首先通过summary来查看结构是否有问题
----------------------------------------------------------------
Layer (type) Output Shape Param #
================================================================
Conv2d-1 [-1, 64, 224, 224] 1,792
ReLU-2 [-1, 64, 224, 224] 0
Conv2d-3 [-1, 64, 224, 224] 36,928
ReLU-4 [-1, 64, 224, 224] 0
MaxPool2d-5 [-1, 64, 112, 112] 0
Conv2d-6 [-1, 128, 112, 112] 73,856
ReLU-7 [-1, 128, 112, 112] 0
Conv2d-8 [-1, 128, 112, 112] 147,584
ReLU-9 [-1, 128, 112, 112] 0
MaxPool2d-10 [-1, 128, 56, 56] 0
Conv2d-11 [-1, 256, 56, 56] 295,168
ReLU-12 [-1, 256, 56, 56] 0
Conv2d-13 [-1, 256, 56, 56] 590,080
ReLU-14 [-1, 256, 56, 56] 0
Conv2d-15 [-1, 256, 56, 56] 590,080
ReLU-16 [-1, 256, 56, 56] 0
MaxPool2d-17 [-1, 256, 28, 28] 0
Conv2d-18 [-1, 512, 28, 28] 1,180,160
ReLU-19 [-1, 512, 28, 28] 0
Conv2d-20 [-1, 512, 28, 28] 2,359,808
ReLU-21 [-1, 512, 28, 28] 0
Conv2d-22 [-1, 512, 28, 28] 2,359,808
ReLU-23 [-1, 512, 28, 28] 0
MaxPool2d-24 [-1, 512, 14, 14] 0
Conv2d-25 [-1, 512, 14, 14] 2,359,808
ReLU-26 [-1, 512, 14, 14] 0
Conv2d-27 [-1, 512, 14, 14] 2,359,808
ReLU-28 [-1, 512, 14, 14] 0
Conv2d-29 [-1, 512, 14, 14] 2,359,808
ReLU-30 [-1, 512, 14, 14] 0
MaxPool2d-31 [-1, 512, 7, 7] 0
Linear-32 [-1, 512] 12,845,568
ReLU-33 [-1, 512] 0
Dropout-34 [-1, 512] 0
Linear-35 [-1, 256] 131,328
ReLU-36 [-1, 256] 0
Dropout-37 [-1, 256] 0
Linear-38 [-1, 10] 2,570
Softmax-39 [-1, 10] 0
================================================================
Total params: 27,694,154
Trainable params: 12,979,466
Non-trainable params: 14,714,688
----------------------------------------------------------------
Input size (MB): 0.57
Forward/backward pass size (MB): 218.41
Params size (MB): 105.64
Estimated Total Size (MB): 324.63
----------------------------------------------------------------
准备新网络需要的数据
训练集需要随机裁剪和水平翻转。
验证集需要重置分辨率和按尺寸中心裁剪。
最后二者都要进行标准化处理。
# 使用10类猴子的数据集,对训练集预处理
train_data_transforms = transforms.Compose([
transforms.RandomResizedCrop(224), # 随机裁剪为224*224
transforms.RandomHorizontalFlip(), # 依概率p=0.5水平翻转
transforms.ToTensor(), # 转为张量并归一化至0-1
# 图像标准化
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
# 使用10类猴子的数据集,对验证集预处理
val_data_transforms = transforms.Compose([
transforms.Resize(256), # 重置图像分辨率
transforms.CenterCrop(224), # 依据给定的size从中心裁剪
transforms.ToTensor(), # 转为张量并归一化至0-1
# 图像标准化
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
数据读取部分如下:
# 读取训练集图像
train_data_dir = './data/10-monkey-species/training/training'
# 使用ImageFolder打包
train_data = ImageFolder(train_data_dir, transform=val_data_transforms) # 需要在traindata下再建一个traindata文件夹
train_data_loader = Data.DataLoader(train_data, batch_size = 16,
shuffle = True, num_workers = 2)
# 读取验证集
val_data_dir = './data/10-monkey-species/validation/validation'
# 使用ImageFolder打包
val_data = ImageFolder(val_data_dir, transform=val_data_transforms)
val_data_loader = Data.DataLoader(val_data, batch_size = 16,
shuffle = True, num_workers = 2)
print('训练集样本数', len(train_data.targets))
print('验证集样本数', len(val_data.targets))
训练集样本数 1097
验证集样本数 272
# 类别标签
print(train_data.class_to_idx)
print(val_data.class_to_idx)
{'n0': 0, 'n1': 1, 'n2': 2, 'n3': 3, 'n4': 4, 'n5': 5, 'n6': 6, 'n7': 7, 'n8': 8, 'n9': 9}
{'n0': 0, 'n1': 1, 'n2': 2, 'n3': 3, 'n4': 4, 'n5': 5, 'n6': 6, 'n7': 7, 'n8': 8, 'n9': 9}
使用 matplotlib 可视化部分测试集:
# 获得一个batch的可视化数据
for step, (b_x, b_y) in enumerate(train_data_loader):
if step > 0:
break
# 可视化训练集其中的一个batch图像
mean = np.array([0.485, 0.456, 0.406])
std = np.array([0.229, 0.224, 0.225])
plt.figure(figsize = (12, 6))
for ii in np.arange(len(b_y)):
plt.subplot(4, 8, ii+1)
img = b_x[ii, :, :, :].numpy().transpose((1, 2, 0))
img = std * img + mean
img = np.clip(img, 0, 1)
plt.imshow(img)
plt.title(b_y[ii].data.numpy())
plt.axis('off')
plt.subplots_adjust(hspace = 0.3)
微调网络的训练和预测
使用 Adam 优化器,交叉熵损失,使用 HiddenLayer 库可视化训练集和验证集的表现。
# 定义优化器
optimizer = optim.Adam(Myvggc.parameters(), lr=0.0003)
# 损失函数
loss_func = nn.CrossEntropyLoss().to(device)
# 记录训练过程的指标
historyl = hl.History()
# 使用Canvas可视化
canvasl = hl.Canvas()
# 对模型迭代训练,对所有数据训练epoch轮
for epoch in range(10):
train_loss_epoch = 0
val_loss_epoch = 0
train_corrects = 0
val_corrects = 0
# 对训练数据的加载器进行迭代运算
Myvggc.train()
for step, (b_x, b_y) in enumerate(train_data_loader):
b_x, b_y = b_x.to(device), b_y.to(device)
output = Myvggc(b_x) # 当前迭代的预测值
loss = loss_func(output, b_y) # 根据label和预测值计算损失
pre_lab = torch.argmax(output, 1)
optimizer.zero_grad() # 清空梯度
loss.backward() # 梯度损失反向传播
optimizer.step() # 根据梯度更新权重
train_loss_epoch += loss.item() * b_x.size(0)
train_corrects += torch.sum(pre_lab == b_y.data) # 预测正确的个数
# 计算一个epoch的损失和精度
train_loss = train_loss_epoch / len(train_data.targets)
train_acc = train_corrects.double() / len(train_data.targets)
# 计算在验证集上的表现
Myvggc.eval()
# if epoch % 5 == 0: # 如果想加快网络迭代进程,不必要每次都测试验证集,可以每隔5次测一次
for step, (val_x, val_y) in enumerate(val_data_loader):
val_x, val_y = val_x.to(device), val_y.to(device)
output = Myvggc(val_x) # 当前迭代的预测值
loss = loss_func(output, val_y) # 根据label和预测值计算损失
pre_lab = torch.argmax(output, 1)
val_loss_epoch += loss.item() * val_x.size(0)
val_corrects += torch.sum(pre_lab == val_y.data) # 预测正确的个数
# 计算一个epoch的损失和精度
val_loss = val_loss_epoch / len(val_data.targets)
val_acc = val_corrects.double() / len(val_data.targets)
# 保存每个epoch上的输出loss和acc
historyl.log(epoch, train_loss=train_loss,
val_loss = val_loss,
train_acc = train_acc.item(),
val_acc = val_acc.item()
)
# 可视化网络训练的过程
with canvasl:
canvasl.draw_plot([historyl['train_loss'], historyl['val_loss']])
canvasl.draw_plot([historyl['train_acc'], historyl['val_acc']])
canvasl.save('iter_visual.png')
可以根据训练过程看损失函数和识别精度的变化情况。
# 保存模型
model = Myvggc
torch.save(model, 'mycnn.pkl') # 保存模型