Pytorch:卷积神经网络-预训练网络微调

Pytorch: 微调预训练好的卷积神经网络(VGG) 识别十类猴子

Copyright: Jingmin Wei, Pattern Recognition and Intelligent System, School of Artificial and Intelligence, Huazhong University of Science and Technology

Pytorch教程专栏链接


本教程不商用,仅供学习和参考交流使用,如需转载,请联系本人。

猴子的数据地址为:https://www.kaggle.com/slothkong/10-monkey-species 。其中包含了训练集和验证集

微调预训练的VGG16网络
import numpy as np
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns
import hiddenlayer as hl
import os

import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as Data
from torchvision import models
from torchvision import transforms
from torchvision.datasets import ImageFolder
from torchsummary import summary
from torchviz import make_dot
# 模型加载选择GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
print(torch.cuda.device_count())
print(torch.cuda.get_device_name(0))
cuda
1
GeForce MX250

冻结特征提取层参数,不更新权重,以提高网络训练速度

# 导入vgg16网络
vgg16 = models.vgg16(pretrained=True)
# 获取vgg16的特征提取层
vgg = vgg16.features
# 将vgg16的特征提取层的参数冻结,不对其进行更新
for param in vgg.parameters():
    param.requires_grad_(False)

在VGG16的基础上,设计全连接层,512、256、10。在前向传播函数中,由 self.classify 得到输出

class MyVggModel(nn.Module):
    def __init__(self):
        super(MyVggModel, self).__init__()
        # vgg16的特征提取层
        self.vgg = vgg
        # 添加新的全连接层
        self.classifier = nn.Sequential(nn.Linear(25088, 512), 
                                       nn.ReLU(), 
                                       nn.Dropout(p=0.5),
                                       nn.Linear(512, 256),
                                       nn.ReLU(),
                                       nn.Dropout(p=0.5),
                                       nn.Linear(256, 10),
                                       nn.Softmax(dim=1))
    def forward(self, x):
        # 前向传播
        x = self.vgg(x)
        x = x.view(x.size(0), -1)
        output = self.classifier(x)
        return output
Myvggc = MyVggModel().to(device)
print(Myvggc)
MyVggModel(
  (vgg): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (17): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (18): ReLU(inplace=True)
    (19): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (20): ReLU(inplace=True)
    (21): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (22): ReLU(inplace=True)
    (23): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (24): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (25): ReLU(inplace=True)
    (26): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (27): ReLU(inplace=True)
    (28): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (29): ReLU(inplace=True)
    (30): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Linear(in_features=25088, out_features=512, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.5, inplace=False)
    (3): Linear(in_features=512, out_features=256, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.5, inplace=False)
    (6): Linear(in_features=256, out_features=10, bias=True)
    (7): Softmax(dim=1)
  )
)
# 输出网络结构
x = torch.randn(1, 3, 224, 224).requires_grad_(True) # 1个样本,大小为(3, 224, 224)
y = Myvggc(x)
myvgg_vis = make_dot(y, params=dict(list(Myvggc.named_parameters()) + [('x', x)]))
myvgg_vis

在这里插入图片描述

summary(Myvggc.cuda(), input_size=(3, 224, 224)) 
# 这里说明,如果要修改输入图像的大小,需要同时通过层层计算,最后修改全连接层第一个参数25088即可
# 要保证网络能正确运行,需要首先通过summary来查看结构是否有问题
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
================================================================
            Conv2d-1         [-1, 64, 224, 224]           1,792
              ReLU-2         [-1, 64, 224, 224]               0
            Conv2d-3         [-1, 64, 224, 224]          36,928
              ReLU-4         [-1, 64, 224, 224]               0
         MaxPool2d-5         [-1, 64, 112, 112]               0
            Conv2d-6        [-1, 128, 112, 112]          73,856
              ReLU-7        [-1, 128, 112, 112]               0
            Conv2d-8        [-1, 128, 112, 112]         147,584
              ReLU-9        [-1, 128, 112, 112]               0
        MaxPool2d-10          [-1, 128, 56, 56]               0
           Conv2d-11          [-1, 256, 56, 56]         295,168
             ReLU-12          [-1, 256, 56, 56]               0
           Conv2d-13          [-1, 256, 56, 56]         590,080
             ReLU-14          [-1, 256, 56, 56]               0
           Conv2d-15          [-1, 256, 56, 56]         590,080
             ReLU-16          [-1, 256, 56, 56]               0
        MaxPool2d-17          [-1, 256, 28, 28]               0
           Conv2d-18          [-1, 512, 28, 28]       1,180,160
             ReLU-19          [-1, 512, 28, 28]               0
           Conv2d-20          [-1, 512, 28, 28]       2,359,808
             ReLU-21          [-1, 512, 28, 28]               0
           Conv2d-22          [-1, 512, 28, 28]       2,359,808
             ReLU-23          [-1, 512, 28, 28]               0
        MaxPool2d-24          [-1, 512, 14, 14]               0
           Conv2d-25          [-1, 512, 14, 14]       2,359,808
             ReLU-26          [-1, 512, 14, 14]               0
           Conv2d-27          [-1, 512, 14, 14]       2,359,808
             ReLU-28          [-1, 512, 14, 14]               0
           Conv2d-29          [-1, 512, 14, 14]       2,359,808
             ReLU-30          [-1, 512, 14, 14]               0
        MaxPool2d-31            [-1, 512, 7, 7]               0
           Linear-32                  [-1, 512]      12,845,568
             ReLU-33                  [-1, 512]               0
          Dropout-34                  [-1, 512]               0
           Linear-35                  [-1, 256]         131,328
             ReLU-36                  [-1, 256]               0
          Dropout-37                  [-1, 256]               0
           Linear-38                   [-1, 10]           2,570
          Softmax-39                   [-1, 10]               0
================================================================
Total params: 27,694,154
Trainable params: 12,979,466
Non-trainable params: 14,714,688
----------------------------------------------------------------
Input size (MB): 0.57
Forward/backward pass size (MB): 218.41
Params size (MB): 105.64
Estimated Total Size (MB): 324.63
----------------------------------------------------------------
准备新网络需要的数据

训练集需要随机裁剪和水平翻转。

验证集需要重置分辨率和按尺寸中心裁剪。

最后二者都要进行标准化处理。

# 使用10类猴子的数据集,对训练集预处理
train_data_transforms = transforms.Compose([
    transforms.RandomResizedCrop(224), # 随机裁剪为224*224
    transforms.RandomHorizontalFlip(), # 依概率p=0.5水平翻转
    transforms.ToTensor(), # 转为张量并归一化至0-1
    # 图像标准化
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
# 使用10类猴子的数据集,对验证集预处理
val_data_transforms = transforms.Compose([
    transforms.Resize(256), # 重置图像分辨率
    transforms.CenterCrop(224), # 依据给定的size从中心裁剪
    transforms.ToTensor(), # 转为张量并归一化至0-1
    # 图像标准化
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

数据读取部分如下:

# 读取训练集图像
train_data_dir = './data/10-monkey-species/training/training'
# 使用ImageFolder打包
train_data = ImageFolder(train_data_dir, transform=val_data_transforms) # 需要在traindata下再建一个traindata文件夹
train_data_loader = Data.DataLoader(train_data, batch_size = 16, 
                                    shuffle = True, num_workers = 2)
# 读取验证集
val_data_dir = './data/10-monkey-species/validation/validation'
# 使用ImageFolder打包
val_data = ImageFolder(val_data_dir, transform=val_data_transforms)
val_data_loader = Data.DataLoader(val_data, batch_size = 16, 
                                    shuffle = True, num_workers = 2)
print('训练集样本数', len(train_data.targets))
print('验证集样本数', len(val_data.targets))
训练集样本数 1097
验证集样本数 272
# 类别标签
print(train_data.class_to_idx)
print(val_data.class_to_idx)
{'n0': 0, 'n1': 1, 'n2': 2, 'n3': 3, 'n4': 4, 'n5': 5, 'n6': 6, 'n7': 7, 'n8': 8, 'n9': 9}
{'n0': 0, 'n1': 1, 'n2': 2, 'n3': 3, 'n4': 4, 'n5': 5, 'n6': 6, 'n7': 7, 'n8': 8, 'n9': 9}

使用 matplotlib 可视化部分测试集:

# 获得一个batch的可视化数据
for step, (b_x, b_y) in enumerate(train_data_loader):
    if step > 0:
        break
# 可视化训练集其中的一个batch图像
mean = np.array([0.485, 0.456, 0.406])
std = np.array([0.229, 0.224, 0.225])
plt.figure(figsize = (12, 6))
for ii in np.arange(len(b_y)):
    plt.subplot(4, 8, ii+1)
    img = b_x[ii, :, :, :].numpy().transpose((1, 2, 0))
    img = std * img + mean
    img = np.clip(img, 0, 1)
    plt.imshow(img)
    plt.title(b_y[ii].data.numpy())
    plt.axis('off')
plt.subplots_adjust(hspace = 0.3)


在这里插入图片描述

微调网络的训练和预测

使用 Adam 优化器,交叉熵损失,使用 HiddenLayer 库可视化训练集和验证集的表现。

# 定义优化器
optimizer = optim.Adam(Myvggc.parameters(), lr=0.0003)
# 损失函数
loss_func = nn.CrossEntropyLoss().to(device)
# 记录训练过程的指标
historyl = hl.History()
# 使用Canvas可视化
canvasl = hl.Canvas()
# 对模型迭代训练,对所有数据训练epoch轮
for epoch in range(10):
    train_loss_epoch = 0
    val_loss_epoch = 0
    train_corrects = 0
    val_corrects = 0
    
    # 对训练数据的加载器进行迭代运算
    Myvggc.train()
    for step, (b_x, b_y) in enumerate(train_data_loader):
        b_x, b_y = b_x.to(device), b_y.to(device)
        output = Myvggc(b_x)    # 当前迭代的预测值  
        loss = loss_func(output, b_y)    # 根据label和预测值计算损失
        pre_lab = torch.argmax(output, 1)
        optimizer.zero_grad()    # 清空梯度
        loss.backward()    # 梯度损失反向传播
        optimizer.step()    # 根据梯度更新权重
        train_loss_epoch += loss.item() * b_x.size(0)
        train_corrects += torch.sum(pre_lab == b_y.data)    # 预测正确的个数
    # 计算一个epoch的损失和精度
    train_loss = train_loss_epoch / len(train_data.targets)
    train_acc = train_corrects.double() / len(train_data.targets)
    
    # 计算在验证集上的表现
    Myvggc.eval()
    # if epoch % 5 == 0: # 如果想加快网络迭代进程,不必要每次都测试验证集,可以每隔5次测一次
    for step, (val_x, val_y) in enumerate(val_data_loader):
        val_x, val_y = val_x.to(device), val_y.to(device)
        output = Myvggc(val_x)    # 当前迭代的预测值  
        loss = loss_func(output, val_y)    # 根据label和预测值计算损失
        pre_lab = torch.argmax(output, 1)
        val_loss_epoch += loss.item() * val_x.size(0)
        val_corrects += torch.sum(pre_lab == val_y.data)    # 预测正确的个数
    
    # 计算一个epoch的损失和精度
    val_loss = val_loss_epoch / len(val_data.targets)
    val_acc = val_corrects.double() / len(val_data.targets)
    
    # 保存每个epoch上的输出loss和acc
    historyl.log(epoch, train_loss=train_loss,
                 val_loss = val_loss,
                 train_acc = train_acc.item(),
                 val_acc = val_acc.item()
                 )
    
    # 可视化网络训练的过程
    with canvasl:
        canvasl.draw_plot([historyl['train_loss'], historyl['val_loss']])
        canvasl.draw_plot([historyl['train_acc'], historyl['val_acc']])
canvasl.save('iter_visual.png')


在这里插入图片描述

可以根据训练过程看损失函数和识别精度的变化情况。

# 保存模型
model = Myvggc
torch.save(model, 'mycnn.pkl')  # 保存模型
  • 4
    点赞
  • 11
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值