CV实验之VGG16网络

文章介绍了VGG16神经网络模型的原理,包括其深度卷积层和全连接层结构。通过添加批量归一化层、调整全连接层和使用平均池化层对模型进行微调。在CIFAR10数据集上进行训练,采用数据增强、随机梯度下降优化器和余弦退火学习率调度器,最高测试准确率达到91.33%。
摘要由CSDN通过智能技术生成

一、原理

VGG16是一个具有深度堆叠卷积层和全连接层的神经网络,共有16层,包括13层卷积层和3层全连接层。VGG16 引入了使用非常小的 3x3 卷积核进行卷积操作的概念。这种设计选择使得网络进一步深化,增加了网络的表征能力。并通过多次使用 3x3 卷积层后接一个池化层,使得VGG16拥有更多的非线性变换和抽象表示能力。其网络结构图如下所示:

二、训练与测试

(1)模型微调:

1、在每个卷积层后都加上nn.BatchNorm2d(x),即批量归一化层。该层有助于解决训练过程中的内部协变量偏移问题,从而加速收敛,并提高模型的稳定性和准确性。通过对每个通道的特征进行均值和方差归一化,使得每个通道具有类似的分布,有利于模型的训练和泛化。

2、最后的三个全连接层转变为只使用1个全连接层。

3、最后一层加上nn.AvgPool2d(kernel_size=1, stride=1),即平均池化层。用于对输入的特征图进行平均池化操作,将特征图的空间维度缩减为更小的尺寸,以减少参数数量和计算量,并提取特征的更高级表示。

原始VGG16模型具体结构:

微调1VGG16模型具体结构:

微调2VGG16模型具体结构

(2)参数设置:

1、数据增强。主要用到了图像随机裁剪、像素填充、水平翻转、归一化等处理。数据增强操作不宜过多,适当即可。

transform_train = transforms.Compose(
    [
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
     ])

transform_test = transforms.Compose(
    [
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))]
)
2、数据集准备。batch_size设置为128。
trainset = torchvision.datasets.CIFAR10(root='D:\CV\pytorch\dataset\cifar-10-batches-py',
                                        train=True, download=True, transform=transform_train)
trainLoader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True)

testset = torchvision.datasets.CIFAR10(root='D:\CV\pytorch\dataset\cifar-10-batches-py',
                                       train=False, download=True, transform=transform_test)
testLoader = torch.utils.data.DataLoader(testset, batch_size=128, shuffle=False)

3、优化器

 损失函数主要用到了交叉熵损失,优化器为随机梯度下降optim.SGD(),学习率初始值为0.01,momentum=0.9,并使用了余弦退火学习率调度器,可以在训练过程中逐渐降低学习率,有助于更好地优化模型。

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)  # 0.1,0.01
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)

具体代码如下所示:

model.py

'''VGG11/13/16/19 in Pytorch.'''
import torch
import torch.nn as nn


cfg = {
    'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
    'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}


class VGG(nn.Module):
    def __init__(self, vgg_name):
        super(VGG, self).__init__()
        self.features = self._make_layers(cfg[vgg_name])
        self.classifier = nn.Linear(512, 10)

    def forward(self, x):
        out = self.features(x)
        out = out.view(out.size(0), -1)
        out = self.classifier(out)
        return out

    def _make_layers(self, cfg):
        layers = []
        in_channels = 3
        for x in cfg:
            if x == 'M':
                layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
            else:
                layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
                           nn.BatchNorm2d(x),
                           nn.ReLU(inplace=True)]
                in_channels = x
        layers += [nn.AvgPool2d(kernel_size=1, stride=1)]
        return nn.Sequential(*layers)


def test():
    net = VGG('VGG11')
    x = torch.randn(2,3,32,32)
    y = net(x)
    print(y.size())

main.py

import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
from torch.nn import init
from torch.optim import Adam
from torch.optim.lr_scheduler import StepLR
from torch.utils.tensorboard import SummaryWriter
from models import *


transform_train = transforms.Compose(
    [
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
     ])

transform_test = transforms.Compose(
    [
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))]
)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

trainset = torchvision.datasets.CIFAR10(root='D:\CV\pytorch\dataset\cifar-10-batches-py',
                                        train=True, download=True, transform=transform_train)
trainLoader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True)

testset = torchvision.datasets.CIFAR10(root='D:\CV\pytorch\dataset\cifar-10-batches-py',
                                       train=False, download=True, transform=transform_test)
testLoader = torch.utils.data.DataLoader(testset, batch_size=128, shuffle=False)

writer = SummaryWriter('D:\CV\pytorch\pytorch-cifar-master\logs_vgg16_100ep')

net = VGG('VGG16')

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)  # 0.1,0.01
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)

total_times = 100
total = 0
accuracy_rate = []


for epoch in range(total_times):
    net .train()
    net .to(device)
    running_loss = 0.0
    total_train_correct = 0
    total_train_samples = 0
    total_correct = 0
    total_trainset = 0

    for i, (data, labels) in enumerate(trainLoader, 0):
        data = data.to(device)
        outputs = net(data).to(device)
        labels = labels.to(device)
        loss = criterion(outputs, labels).to(device)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        _, pred = outputs.max(1)
        correct = (pred == labels).sum().item()
        total_correct += correct
        total_trainset += data.shape[0]
        running_loss += loss.item()
        total_train_correct += correct
        total_train_samples += data.shape[0]

    train_loss = running_loss / len(trainLoader)
    train_accuracy = total_train_correct / total_train_samples

    writer.add_scalar('Train/Loss', train_loss, epoch)
    writer.add_scalar('Train/Accuracy', train_accuracy, epoch)
    print('epoch[%d] train_loss: %.4f, train_acc: %.4f' % (epoch + 1, running_loss / len(trainLoader), train_accuracy))

    net.eval()
    correct = 0  # 预测正确的图片数
    total = 0  # 总共的图片数
    losses = []  # 用于记录每个批次的损失值
    with torch.no_grad():
        for data in testLoader:
            images, labels = data
            images = images.to(device)
            outputs = net(images).to(device)
            outputs = outputs.cpu()
            outputarr = outputs.numpy()  # 将output转换为numpy
            _, predicted = torch.max(outputs, 1)  # 获取每个样本在输出中的最大值以及对应索引,predicted 保存预测类别标签。
            total += labels.size(0)  # 这是为了计算整个测试集的准确率时,获得正确的总样本数
            correct += (predicted == labels).sum()  # (predicted == labels) 会生成一个布尔张量.sum() 对布尔张量进行求和,得到预测正确的样本数量。
            # 计算测试损失
            loss = criterion(outputs, labels)
            losses.append(loss.item())

    accuracy = 100 * correct / total
    accuracy_rate.append(accuracy)
    mean_loss = sum(losses) / len(losses)  # 平均损失值

    writer.add_scalar('Test/Loss', mean_loss, epoch)
    writer.add_scalar('Test/Accuracy', accuracy, epoch)

    print(f'epoch[{epoch + 1}]  test_lost: {mean_loss:.4f}  test_acc: {accuracy:.2f}%')
    # print(f'测试准确率:{accuracy}%'.format(accuracy))
    # test()
    scheduler.step()
writer.close()

torch.save(net.state_dict(), 'D:\CV\pytorch\pytorch-cifar-master/res/VGG16_100epoch.pth')
accuracy_rate = np.array(accuracy_rate)
times = np.linspace(1, total_times, total_times)
plt.xlabel('times')
plt.ylabel('accuracy rate')
plt.plot(times, accuracy_rate)
plt.show()

print(accuracy_rate)

3、训练测试结果与总结

我们用微调后的VG16模型在CIFAR10数据集上训练了100个epoch,最高测试准确率为91.33%,这相对于原始的VGG16模型有了较大的改善。我们对微调模型进行了对比与分析,得出以下结论。

下图为在pycham上运行的结果,因为算力有限,我的batch_size设置为64,而一般最好设置为128或256。

 

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值