VGG简介
VGG是由Simonyan 和Zisserman在文献《Very Deep Convolutional Networks for Large Scale Image Recognition》中提出卷积神经网络模型,其名称来源于作者所在的牛津大学视觉几何组(Visual Geometry Group)的缩写。
该模型参加2014年的 ImageNet图像分类与定位挑战赛,取得了优异成绩:在分类任务上排名第二,在定位任务上排名第一。
结构
用PyTorch实现VGG-16
import torch
from torch import nn
import torch.nn.functional as F
class vgg(nn.Module):
def __init__(self,num_classes):
super(vgg,self).__init__()
self.num_classes = num_classes
# block1
self.conv1 = nn.Conv2d(in_channels=3,out_channels=64,kernel_size=3,stride=1,padding=1) # 64*32*32
self.conv2 = nn.Conv2d(64,64,3,1,1) # 61*32*32
self.maxpool1 = nn.MaxPool2d(kernel_size=2,stride=2) # 64*16*16
# block2
self.conv3 = nn.Conv2d(64,128,3,1,1) # 128*16*16
self.conv4 = nn.Conv2d(64,128,3,1,1) # 128*16*16
self.maxpool2 = nn.MaxPool2d(kernel_size=2,stride=2) #128*8*8
# block3
self.conv5 = nn.Conv2d(128,256,3,1,1) # 256*8*8
self.conv6 = nn.Conv2d(256,256,3,1,1) # 256*8*8
self.conv7 = nn.Conv2d(256,256,1,1,0) # 256*8*8
self.maxpool3 = nn.MaxPool2d(kernel_size=2,stride=2) #256*4*4
# block4
self.conv8 = nn.Conv2d(256,512,3,1,1) # 512*4*4
self.conv9 = nn.Conv2d(512,512,3,1,1) # 512*4*4
self.conv10 = nn.Conv2d(512,512,1,1,0) # 512*4*4
self.maxpool4 = nn.MaxPool2d(kernel_size=2,stride=2) #512*2*2
# block5
self.conv11 = nn.Conv2d(512,512,3,1,1) # 512*2*2
self.conv12 = nn.Conv2d(512,512,3,1,1) # 512*2*2
self.conv13 = nn.Conv2d(512,512,1,1,0) # 512*2*2
self.maxpool5 = nn.MaxPool2d(kernel_size=2,stride=2) #512*1*1
# FC
self.fc1 = nn.Linear(in_features=512*1*1,out_features=4096)
self.fc2 = nn.Linear(in_features=4096,out_features=4096)
self.fc3 = nn.Linear(in_features=4096,out_features=4096)
def forward(self,x):
# block1
x = F.relu(self.conv1(x))
x = self.maxpool1(F.relu(self.conv2(x)))
# block2
x = F.relu(self.conv3(x))
x = self.maxpool2(F.relu(self.conv4(x)))
# block3
x = F.relu(self.conv5(x))
x = F.relu(self.conv6(x))
x = self.maxpool3(F.relu(self.conv7(x)))
# block4
x = F.relu(self.conv8(x))
x = F.relu(self.conv9(x))
x = self.maxpool4(F.relu(self.conv10(x)))
# block5
x = F.relu(self.conv11(x))
x = F.relu(self.conv12(x))
x = self.maxpool5(F.relu(self.conv13(x)))
# FC
x = x.view(512*1*1,-1)
x = self.drop(F.relu(self.fc1(x)))
x = self.drop(F.relu(self.fc2(x)))
x = self.fc3(x)
使用make_layers构建block
class vgg_make_layers(nn.Module):
def __init__(self,num_classes):
super(vgg_make_layers, self).__init__()
self.in_channels = 3 # 设置初始通道数
self.num_classes = num_classes
# extract featuers
self.conv3_64 = self.__make_layers(64,2)
self.maxpool1 = nn.MaxPool2d(2,2) # 64*16*16
self.conv64_128 = self.__make_layers(128,2)
self.maxpool2 = nn.MaxPool2d(2,2) # 128*8*8
self.conv128_256 = self.__make_layers(256,3)
self.maxpool3 = nn.MaxPool2d(2,2) # 256*4*4
self.conv256_512 = self.__make_layers(512,3)
self.maxpool4 = nn.MaxPool2d(2,2) # 512*2*2
self.conv512_512 = self.__make_layers(512,3)
self.maxpool5 = nn.MaxPool2d(2, 2) # 512*1*1
# classifier
self.fc = nn.Sequential(
nn.Linear(512*1*1,4096),
nn.ReLU(),
nn.Linear(4096,4096),
nn.ReLU(),
nn.Linear(4096,self.num_classes),
)
def __make_layers(self,channels,num): # 自动生成任意个数conv层
layers = []
for i in range(num):
layers.append(nn.Conv2d(self.in_channels,channels,3,1,1))
layers.append(nn.ReLU())
self.in_channels = channels # 将本层通道数设置为下一层输入通道数
return nn.Sequential(*layers) # 返回生成的layers
def forward(self,x):
# block1
out = self.conv3_64(x)
out = self.maxpool1(out)
# block2
out = self.conv64_128(out)
out = self.maxpool2(out)
# block3
out = self.conv128_256(out)
out = self.maxpool3(out)
# block4
out = self.conv256_512(out)
out = self.maxpool4(out)
# block5
out = self.conv512_512(out)
out = self.maxpool5(out)
# FC
out = out.view(-1,512*1*1)
out = self.fc(out)
if __name__ == "__main__":
net = vgg_make_layers(10)
print(net)
x = torch.rand(size=(1,3,32,32))
x_out = net(x)