VGG网络简介
VGGNet是牛津大学视觉几何组(Visual Geometry Group)提出的模型,该模型在2014ImageNet图像分类与定位挑战赛 ILSVRC-2014中取得在分类任务第二,定位任务第一的优异成绩。VGGNet突出的贡献是证明了很小的卷积,通过增加网络深度可以有效提高性能。
VGGNet网络5种结构详见
以VGG16-3网络为例
VGG16网络各层概述
神经网络微调即任务详解
深度卷积神经网络模型由于其层数多,需要训练的参数多,导致从零开始训练很深的卷积神经网络十分困难,利用预训练好的深度学习网络,针对自己的需求,对预训练好的深度神经网络进行微调,从而快速完成自己的任务。
本次任务是基于预训练好的VGG16网络,对其网络结构进行微调,使用自己的分类数据集,训练一个图像分类器使用来自kaggle的10类猴子数据集,包含训练集和验证数据集,针对该数据集使用VGG16的卷积层和池化层的预训练的权重,提取数据特征,然后定义新的全连接层,用于图像的分类。
实现VGG16微调网络的代码,并输出网络结构
#导入预训练模型
vgg16 = models.vgg16()
vgg16_parameters=torch.load('vgg16_model/vgg16.pth')
#vgg16_parameters= torch.load("vgg16_model\vgg16.pth")
vgg16.load_state_dict(vgg16_parameters)
vgg = vgg16.features # 获取vgg16的特征提取层(去除全连接层的卷积模块)
# requires_grad_(False)冻结vgg16的所有网络层参数,不更新
for param in vgg.parameters():
param.requires_grad_(False)
class MyNet(nn.Module):
def __init__(self):
super(MyNet,self).__init__()
# 预训练的VGG16网络的features layers:
self.vgg = vgg
#自定义全连接层:即改写的分层
self.classifier = nn.Sequential(
nn.Linear(25088, 2048),
nn.ReLU(inplace = True),
nn.Dropout(p = 0.5),
nn.Linear(2048, 512),
nn.ReLU(inplace = True),
nn.Dropout(p = 0.5),
nn.Linear(512, 10),
)
# 前向传播
def forward(self, x):
x = self.vgg(x)
x = x.view(x.size(0), -1)
output = self.classifier(x)
return output
输出网络结构:
Out[4]:
MyVggModel(
(vgg): Sequential(
(0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): ReLU(inplace=True)
(2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(3): ReLU(inplace=True)
(4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(6): ReLU(inplace=True)
(7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(8): ReLU(inplace=True)
(9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(11): ReLU(inplace=True)
(12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(13): ReLU(inplace=True)
(14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(15): ReLU(inplace=True)
(16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(17): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(18): ReLU(inplace=True)
(19): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(20): ReLU(inplace=True)
(21): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(22): ReLU(inplace=True)
(23): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(24): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(25): ReLU(inplace=True)
(26): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(27): ReLU(inplace=True)
(28): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(29): ReLU(inplace=True)
(30): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
(classifier): Sequential(
(0): Linear(in_features=25088, out_features=512, bias=True)
(1): ReLU()
(2): Dropout(p=0.5, inplace=False)
(3): Linear(in_features=512, out_features=256, bias=True)
(4): ReLU()
(5): Dropout(p=0.5, inplace=False)
(6): Linear(in_features=256, out_features=10, bias=True)
(7): Softmax(dim=1)
)
)
数据集准备
首先定义训练集和验证集的预处理过程,定义对训练集的预处理train_data_transforms,从而对训练集进行数据增强,对验证集的预处理过程val_data_transforms与train_data_transforms会有一些差异,不需要对图像进行随机翻转与随机裁剪操作。在对读习入的单张图像进行预处理时,通过RandomResizedCrop()对图像进行随机裁剪,使用RandomHorizontalFlip(0将图像依概率p=0.5水平翻转,通过Resize()重置图像分辨率,通过CenterCrop()将图像按照给定的尺寸从中心裁剪,通过Normalize()将图像的像素值进行标准化处理等。
因为每类图像都分别保存在一个单独的文件夹中,所以可以使用ImageFolder()函数从文件中读取训练集和验证集。
train_data_transforms=transforms.Compose([
transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])])
val_data_transforms=transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])])
train_data_dir = "data/chap6/10-monkey-species/training"
train_data = ImageFolder(train_data_dir, transform=train_data_transforms)
train_data_loader = Data.DataLoader (train_data,batch_size=32,shuffle=True, num_workers=2)
val_data_dir = "data/chap6/10-monkey-species/validation"
val_data = ImageFolder(val_data_dir, transform=train_data_transforms)
val_data_loader = Data.DataLoader (train_data,batch_size=32,shuffle=True, num_workers=2)
网络训练及预测
使用训练集对网络进行训练,使用验证集验证。模型在训练时使用Adam优化算法,损失函数使用nn.CrossEntropyLoss()交叉嫡损失。在训练过程中使用HiddenLayer库可视化网络在训练集和验证集上的表现,并使用time模块对每个epoch进行时间统计
optimizer = torch.optim.Adam(Myvggc.parameters(), lr=0.0003)
loss_func = nn.CrossEntropyLoss()
history1 = hl.History()
canvas1 = hl.Canvas()
num_epochs=10
since = time.time() # 当前时间
for epoch in range(num_epochs):
train_loss_epoch = 0
val_loss_epoch = 0
train_corrects = 0
val_corrects = 0
print('Epoch {}/{}'.format(epoch, num_epochs - 1))
Myvggc.train() # 自动开启batch normalize和dropout
for step, (b_x, b_y) in enumerate(train_data_loader):
output = Myvggc(b_x)
loss = loss_func(output, b_y)
pre_lab = torch.argmax(output, dim=1)
optimizer.zero_grad()
loss.backward()
optimizer.step()
train_loss_epoch += loss.item() * b_x.size(0)
train_corrects += torch.sum(pre_lab == b_y.data)
# 计算一个epoch的损失和精度
train_loss = train_loss_epoch / len(train_data.targets)
train_acc = train_corrects.double() / len(train_data.targets)
# 计算在验证集上的表现
Myvggc.eval() # 开启batch normalize, 关dropout
for step, (b_x, b_y) in enumerate(val_data_loader):
output = Myvggc(b_x)
loss = loss_func(output, b_y)
pre_lab = torch.argmax(output, dim=1)
val_loss_epoch += loss.item() * b_x.size(0)
val_corrects += torch.sum(pre_lab == b_y.data)
# 计算一个epoch的损失和精度
val_loss = val_loss_epoch / len(val_data.targets)
val_acc = val_corrects.double() / len(val_data.targets)
time_use = time.time() - since # 计算耗费时间
print('{} Train Loss: {:.4f} Train Acc: {:.4f}'.format(epoch,train_loss,train_acc))
print('{} Val Loss: {:.4f} val Acc: {:.4f}'.format(epoch,val_loss,val_acc))
print("Train and val complete in {:.0f}m {:.0f}s".format(time_use // 60, time_use % 60))
结果显示
Epoch 9/9
9 Train Loss: 1.5742 Train Acc: 0.8902
9 Val Loss: 1.4703 val Acc: 0.9963
Train and val complete in 43m 2s…