VGG
一、VGG简介
从图像中提取CNN特征,VGG模型是首选算法 。VGG是牛津大学 Visual Geometry Group(视觉几何组)的缩写,以研究机构命名。卷积神经网络的深度增加和小卷积核的使用对网络的最终分类识别效果有很大的作用,VGG基本全部是 3x3小卷积核 和 2x2池化核。
二、VGG的详解
有六个配置,我们常用的是D这个配置,即13个卷积层和3个全连接层
堆叠两个3x3的卷积核替代5x5的卷积核,堆叠两个3x3的卷积核替代7x7的卷积核
相同点;具有相同的感受野
三、基础概念拓展-感受野
感受野是指CNN结构中某个特征映射到输入空间的区域大小。对于某一特征的感受野,可以通过它的中心位置和它的尺寸大小来描述。
计算公式:
当前层感受野 = 卷积步长 x(下层特征map感受野 - 1)+ 卷积核 边长
例如;
import math
convnet = [[11, 4, 0], [3, 2, 0], [5, 1, 2], [3, 2, 0], [3, 1, 1],
[3, 1, 1], [3, 1, 1], [3, 2, 0], [6, 1, 0], [1, 1, 0]]
layer_names = ['conv1', 'pool1', 'conv2', 'pool2', 'conv3',
'conv4', 'conv5', 'pool5', 'fc6-conv', 'fc7-conv']
imsize = 227
def outFromIn(conv, layerIn):
n_in = layerIn[0]
j_in = layerIn[1]
r_in = layerIn[2]
start_in = layerIn[3]
k = conv[0]
s = conv[1]
p = conv[2]
n_out = math.floor((n_in - k + 2*p)/s) + 1
actualP = (n_out-1)*s - n_in + k
pR = math.ceil(actualP/2)
pL = math.floor(actualP/2)
j_out = j_in * s
r_out = r_in + (k - 1)*j_in
start_out = start_in + ((k-1)/2 - pL)*j_in
return n_out, j_out, r_out, start_out
def printLayer(layer, layer_name):
print(layer_name + ":")
print("\t n features: %s \n \t jump: %s \n \t receptive size: %s \t start: %s " % (
layer[0], layer[1], layer[2], layer[3]))
layerInfos = []
if __name__ == '__main__':
# first layer is the data layer (image) with n_0 = image size; j_0 = 1; r_0 = 1; and start_0 = 0.5
print("-------Net summary------")
currentLayer = [imsize, 1, 1, 0.5]
printLayer(currentLayer, "input image")
for i in range(len(convnet)):
currentLayer = outFromIn(convnet[i], currentLayer)
layerInfos.append(currentLayer)
printLayer(currentLayer, layer_names[i])
print("------------------------")
layer_name = input("Layer name where the feature in: ")
layer_idx = layer_names.index(layer_name)
idx_x = int(input("index of the feature in x dimension (from 0)"))
idx_y = int(input("index of the feature in y dimension (from 0)"))
n = layerInfos[layer_idx][0]
j = layerInfos[layer_idx][1]
r = layerInfos[layer_idx][2]
start = layerInfos[layer_idx][3]
assert(idx_x < n)
assert(idx_y < n)
print("receptive field: (%s, %s)" % (r, r))
print("center: (%s, %s)" % (start+idx_x*j, start+idx_y*j))
四、搭建VGG
#准备
import torch
import torch.nn as nn
import torch
import torch.nn as nn
cfg = {
'VGG11': [64, 'M', 128, 'M', 256,'M', 512, 'M', 512,'M'],
'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}
#搭建模型
class VGG(nn.Module):
#nn.Module是一个特殊的nn模块,加载nn.Module,这是为了继承父类
def __init__(self, vgg_name):
super(VGG, self).__init__()
#super 加载父类中的__init__()函数
self.features = self._make_layers(cfg[vgg_name])
self.classifier=nn.Linear(512,10)
def forward(self, x):
out = self.features(x)
out = out.view(out.size(0), -1)
#这一步将out拉成out.size(0)的一维向量
out = self.classifier(out)
return out
def _make_layers(self, cfg):
layers = []
in_channels = 3
for x in cfg:
if x == 'M':
layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
else:
layers += [nn.Conv2d(in_channels, x, kernel_size=3,
padding=1,bias=False),
nn.BatchNorm2d(x),
nn.ReLU(inplace=True)]
in_channels = x
return nn.Sequential(*layers)
#检验结果
def t():
net = VGG('VGG19')
x = torch.randn(5,3,32,32)
y = net(x)
print(y.size())
if __name__ == "__main__":
t()