1、SSD第一部分的网络
(1)参考了
python ssd目标检测_目标检测之SSD算法实现(Pytorch)_weixin_39805734的博客-CSDN博客
1.1第一部分的网络具体结构
1)针对第(9)个教程,上一个教程的图1,SSD第一部分网络所指的如下
1.2 第一部分网络对应VGG的结构
1.3 Conv6使用了空洞卷积
conv6从512变为1024使用了空洞卷积
nn.Conv2d(512, 1024, kernel_size=3, padding=6, dilation=6),
1.3.1空洞卷积原理
(1) FC6层的输入[b,512,19,19]
(2)使用卷积核3*3,空洞卷积dilation=6后卷积核大小为13*13
1)空洞卷积的计算
2)自己画图试试看
(3)19*19,padding=6后的图大小为31*31
(4)运算后图的大小为19*19
(19-13+2*6)/1 +1 = 19
1.4Conv7使用了普通卷积
Conv7从1024 变为1024,使用了普通卷积
nn.Conv2d(1024, 1024, kernel_size=1),
2、SSD第一部分的网络的代码实现
注意Conv4和Conv5堆叠的高度都是512,因而
2.1 conv1到conv5的结构
conv1 =》 conv5的网络结构如下:
cfg = base['300'] = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'C', 512, 512, 512, 'M', 512, 512, 512]
2.2 两种实现方法
第一部分网络的两种实现方法
# import torch
from torch import nn
# ssd.py
'''
cfg的方法
(1)方法一:参考来源: https://blog.csdn.net/weixin_39805734/article/details/110558260。已修改它的bug,陈20221013
(2)方法二:其它作者的写法,将base['300']中的所有maxpooling也就是M都写成一样,
不区分C,然后全部使用ceil_mode=True,该方法不推荐!陈20221013
'''
def vgg_backbone_ssd(cfg, i, batch_norm = False):
# cfg = base['300'] = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'C', 512, 512, 512, 'M', 512, 512, 512],
# i = 3
layers = []
in_channels = i
for v in cfg:
if v == 'M':
# print("find M")
layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
elif v == 'C':
layers += [nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)]
else:
# print(v)
conv2d = nn.Conv2d(in_channels=in_channels, out_channels=v, kernel_size=3, padding=1)
if batch_norm:
layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
else:
layers += [conv2d, nn.ReLU(inplace=True)]
in_channels = v
# print("in_channels",in_channels)
pool5 = nn.MaxPool2d(kernel_size=3, stride=1, padding=1)
conv6 = nn.Conv2d(512, 1024, kernel_size=3, padding=6, dilation=6)
conv7 = nn.Conv2d(1024, 1024, kernel_size=1)
layers += [pool5, conv6, nn.ReLU(inplace=True), conv7, nn.ReLU(inplace=True)]
return layers
'''
堆叠实现,待修改,陈20221013
'''
def chen_vgg_backbone_ssd():
base = nn.ModuleList(
[
#conv1, 64, 64, 'M', from [b,3,300,300] => [b,64,150,150]
nn.Sequential(
nn.Conv2d(3, 64, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(64, 64, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2)
),
#conv2, 128, 128, 'M', from [b,64,150,150] => [b,128,75,75]
nn.Sequential(
nn.Conv2d(64, 128, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(128, 128, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2)
),
#conv3, 256, 256, 256, 'C', from [b,128,75,75] => [b,256,38,38]
nn.Sequential(
nn.Conv2d(128, 256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(256, 256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(256, 256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)
),
#conv4, 512, 512, 512, 'M', from [b,512,38,38] => [b,512,19,19]
nn.Sequential(
nn.Conv2d(256, 512, kernel_size=1),
nn.ReLU(inplace=True),
nn.Conv2d(512, 512, kernel_size=3),
nn.ReLU(inplace=True),
nn.Conv2d(256, 256, kernel_size=3),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)
),
#conv5, 512, 512, 512, from [b,512,19,19] => [b,512,19,19]
nn.Sequential(
nn.Conv2d(256, 512, kernel_size=1),
nn.ReLU(inplace=True),
nn.Conv2d(512, 512, kernel_size=3),
nn.ReLU(inplace=True),
nn.Conv2d(256, 256, kernel_size=3),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=1, padding=1),#maxpooling(3,1,1)
),
#fc6
nn.Sequential(
nn.Conv2d(512, 1024, kernel_size=3, padding=6, dilation=6),
),
#fc7
nn.Sequential(
nn.Conv2d(1024, 1024, kernel_size=1),
),
]
)
return base
def main():
#(1)vgg_backbone的定义
base = {
'300': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'C', 512, 512, 512, 'M',
512, 512, 512], # M表示maxpolling
'512': [],
}
cfg = base['300']
i = 3
layers1 = vgg_backbone_ssd(cfg,3)
print(layers1)
layers2 = chen_vgg_backbone_ssd()
print(layers2)
if __name__ == '__main__':
main()