def multibox(vgg, extra_layers, cfg, num_classes):
loc_layers = []
conf_layers = []
vgg_source = [21, -2]
for k, v in enumerate(vgg_source):
print("enumerate(vgg_source): ", k, v)
loc_layers += [nn.Conv2d(vgg[v].out_channels,
cfg[k] * 4, kernel_size=3, padding=1)]
conf_layers += [nn.Conv2d(vgg[v].out_channels,
cfg[k] * num_classes, kernel_size=3, padding=1)]
for k, v in enumerate(extra_layers[1::2], 2):
loc_layers += [nn.Conv2d(v.out_channels, cfg[k]
* 4, kernel_size=3, padding=1)]
conf_layers += [nn.Conv2d(v.out_channels, cfg[k]
* num_classes, kernel_size=3, padding=1)]
print("multibox cfg: ",cfg)
# print("vgg: ",vgg)
# print("extra_layers: ", extra_layers)
# print("loc_layers: ", loc_layers)
# print("conf_layers: ", conf_layers)
return vgg, extra_layers, (loc_layers, conf_layers)
这段代码是用来定义SSD模型中的多尺度预测层的,也就是在VGG主干网络和额外层之后添加的一些卷积层,用来预测每个位置的边界框和类别置信度。这些预测层的结构和参数由cfg参数指定,其中每个元素表示每个位置对应的边界框的数量。这段代码的功能是根据cfg参数,分别在VGG网络和额外层中选取一些特征图,然后在每个特征图上添加两个卷积层,一个用来预测边界框的坐标,一个用来预测边界框的类别置信度,并将它们存储在两个列表loc_layers和conf_layers中,最后返回这两个列表以及VGG网络和额外层。具体来说:
- 定义两个空列表loc_layers和conf_layers,用来存储边界框预测层和类别置信度预测层
- 定义一个列表vgg_source,表示从VGG网络中选取的特征图的索引,分别是第21层(conv4_3)和倒数第二层(fc7)
- 对vgg_source进行遍历,每次取出一个索引v
- 在VGG网络的第v层后面添加一个卷积层,其输出通道数为cfg中对应位置的元素乘以4(因为每个边界框需要4个坐标),其卷积核大小为3x3,其填充为1,将这个卷积层加入到loc_layers列表中
- 在VGG网络的第v层后面再添加一个卷积层,其输出通道数为cfg中对应位置的元素乘以num_classes(因为每个边界框需要num_classes个类别置信度),其卷积核大小为3x3,其填充为1,将这个卷积层加入到conf_layers列表中
- 对额外层进行遍历,从第二个开始每隔一个取出一个卷积层v(因为额外层中有一些降采样的卷积层不用于预测)
- 在额外层的第v层后面添加一个卷积层,其输出通道数为cfg中对应位置的元素乘以4(因为每个边界框需要4个坐标),其卷积核大小为3x3,其填充为1,将这个卷积层加入到loc_layers列表中
- 在额外层的第v层后面再添加一个卷积层,其输出通道数为cfg中对应位置的元素乘以num_classes(因为每个边界框需要num_classes个类别置信度),其卷积核大小为3x3,其填充为1,将这个卷积层加入到conf_layers列表中
这样,当遍历完cfg参数后,就得到了一组多尺度预测层。
输出断点测试
vgg:
[Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(inplace=True),
Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(inplace=True),
MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False), Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(inplace=True),
Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(inplace=True),
MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False), Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(inplace=True),
Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(inplace=True),
Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(inplace=True),
MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=True), Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(inplace=True),
Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(inplace=True),
Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(inplace=True),
MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(inplace=True),
Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(inplace=True),
Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(inplace=True),
MaxPool2d(kernel_size=3, stride=1, padding=1, dilation=1, ceil_mode=False), Conv2d(512, 1024, kernel_size=(3, 3), stride=(1, 1), padding=(6, 6), dilation=(6, 6)),
ReLU(inplace=True),
Conv2d(1024, 1024, kernel_size=(1, 1), stride=(1, 1)),
ReLU(inplace=True)]
extra_layers:
[Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1)),
Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)), Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1)),
Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)), Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1)),
Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1)),
Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1)),
Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1))]
loc_layers:
[Conv2d(512, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
Conv2d(1024, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
Conv2d(512, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
Conv2d(256, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
Conv2d(256, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
Conv2d(256, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))]
conf_layers:
[Conv2d(512, 84, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
Conv2d(1024, 126, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
Conv2d(512, 126, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
Conv2d(256, 126, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
Conv2d(256, 84, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
Conv2d(256, 84, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))]