SSD网络,具体可阅读SSD相关论文:
SSD网络:
#-*-coding:utf-8-*-
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import os
base = {
'300' : [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'C', 512, 512, 512, 'M',
512, 512, 512]
}
def vgg(cfg, picture_channels, batch_norm=False):
layers = []
in_channels = picture_channels
for v in cfg:
if v == 'M': #Maxpooling 并且不进行边缘修补
layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
elif v == 'C': #边缘补nan
layers += [nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)]
else:#卷积前后维度可以通过字典中数据设置好
conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
if batch_norm:
layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
else:
layers += [conv2d, nn.ReLU(inplace=True)]
in_channels = v
pool5 = nn.MaxPool2d(kernel_size=3, stride=1, padding=1)
conv6 = nn.Conv2d(512, 1024, kernel_size=3, padding=6, dilation=6)
conv7 = nn.Conv2d(1024, 1024, kernel_size=1)
layers += [pool5, conv6, nn.ReLU(inplace=True), conv7, nn.ReLU(inplace=True)]
return layers
vgg_model = vgg(base[str(300)], 3)
extras = {
'300' : [256, 'S', 512, 128, 'S', 256, 128, 256, 128, 256]
}
def add_extra(cfg, base_channel, batch_norm=False):
layers = []
in_channels = base_channel
flag = False
for k, v in enumerate(cfg):
if in_channels != 'S' : #s代表stride 为2时就相当于缩小feature map
if v == 'S':
layers += [nn.Conv2d(in_channels, cfg[k + 1],
kernel_size=(1, 3)[flag], stride=2, padding=1)]
else:
layers += [nn.Conv2d(in_channels, v, kernel_size=(1, 3)[flag])]
flag = not flag
in_channels = v
return layers
exter_layer = add_extra(extras[str(300)], 1024)
print('-----------------extra-------------------')
for i in exter_layer:
print(i)
mbox = {
'300' : [4, 6, 6, 6, 4, 4]
}
def multibox(vgg, extra_layers, cfg, num_class):
loc_layers = []
conf_layers = []
vgg_source = [21, -2] #第21层和倒数第二层
for k, v in enumerate(vgg_source):
loc_layers += [nn.Conv2d(vgg[v].out_channels,
cfg[k] * 4, kernel_size=3, padding=1)]
conf_layers += [nn.Conv2d(vgg[v].out_channels,
cfg[k] * num_class, kernel_size=3, padding=1)]
for k, v in enumerate(extra_layers[1::2], 2):
loc_layers += [nn.Conv2d(v.out_channels,
cfg[k] * 4, kernel_size=3, padding=1)]
conf_layers += [nn.Conv2d(v.out_channels,
cfg[k] * num_class, kernel_size=3, padding=1)]
return vgg, extra_layers, (loc_layers, conf_layers)
base_, extras_, head_ = multibox(vgg_model, exter_layer, mbox[str(300)], 21)
print('--------------------------vgg---------------------')
for i in base_:
print(i)
print('-----------------extra-------------------')
for i in extras_:
print(i)
for i, j in enumerate(head_):
if i == 0:
print("--------predction : loc----------")
else:
print("--------predction : conf----------")
for k in j:
print(k)
class SSD(nn.Module):
"""
Single Shot Multibox Architecture
Args:
phase: 'train' or 'test'
size: input image size
base: vgg16
extras: ssd layer
head: loc and conf layer
"""
def __init__(self, phase, size, base, extras, head, num_classes):
super(SSD, self).__init__()
self.phase = phase
self.size = size
self.num_classes = num_classes
#SSD network
self.vgg = nn.ModuleList(base)
# self.L2Norm = L2Norm(512, 20)
self.extras = nn.ModuleList(extras)
self.loc = nn.ModuleList(head[0])
self.conf = nn.ModuleList(head[1])
def forward(self, x):
"""
Applies network layer and ops on input image(s) x
Args:
param x: input image or batch of images shape[batch, 3, 300, 300]
:return:
"""
sources = list()
loc = list()
conf = list()
#apply vgg up to conv4_3 relu
for k in range(23):
x = self.vgg[k](x) #得到[1, 512, 38, 38]
#s = self.L2Norm(x)
sources.append(x)
#apply vgg up to fc7
for k in range(23, len(self.vgg)):
x = self.vgg[k](x) #得到[1, 1024, 19, 19]
sources.append(x)
#apply extra layers and cache source layer oputputs
for k, v in enumerate(self.extras):
x = F.relu(v(x), inplace=True)
if(k % 2 == 1):
sources.append(x)
"""
得到剩下的
torch.Size([1, 512, 10, 10])
torch.Size([1, 256, 5, 5])
torch.Size([1, 256, 3, 3])
torch.Size([1, 256, 1, 1])
"""
#apply multibox head to source layers
for (x, l, c) in zip(sources, self.loc, self.conf):
loc.append(l(x).permute(0, 2, 3, 1).contiguous())
conf.append(c(x).permute(0, 2, 3, 1).contiguous())
FSSD网络(基于SSD),具体请阅读FSSD相关论文:
FSSD网络:
# -*- coding:utf-8 -*-
import torch
import torch.nn as nn
import torch.nn.functional as F
vgg_base = {
'300': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'C', 512, 512, 512, 'M',
512, 512, 512],
'512': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'C', 512, 512, 512, 'M',
512, 512, 512],
}
extras = {
'300': [256, 512, 128, 'S', 256],
'512': [256, 512, 128, 'S', 256],
}
fea_channels = {
'300': [512, 512, 256, 256, 256, 256],
'512': [512, 512, 256, 256, 256, 256, 256]}
size = 300
#调用:vgg(vgg_base[str(size)], 3)
def vgg(cfg, i, batch_norm=False):
layers = []
in_channels = i #输入图片是三通道的
for v in cfg:
if v == 'M':
layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
elif v == 'C':
layers += [nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)]
else:
conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
if batch_norm:
layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
else:
layers += [conv2d, nn.ReLU(inplace=True)]
in_channels = v
pool5 = nn.MaxPool2d(kernel_size=3, stride=1, padding=1)
conv6 = nn.Conv2d(512, 1024, kernel_size=3, padding=6, dilation=6)
conv7 = nn.Conv2d(1024, 1024, kernel_size=1)
layers += [pool5, conv6, nn.ReLU(inplace=True), conv7, nn.ReLU(inplace=True)]
return layers
def add_extras(cfg, i, batch_norm=False):
layers = []
in_channels = i
flag = False
for k, v in enumerate(cfg):
if in_channels != 'S':
if v == 'S':
layers += [nn.Conv2d(in_channels, cfg[k + 1],
kernel_size=(1, 3)[flag], stride=2, padding=1)]
else:
layers += [nn.Conv2d(in_channels, v, kernel_size=(1, 3)[flag])]
flag = not flag
in_channels = v
return layers
for layer in vgg(vgg_base[str(size)], 3):
print(layer)
print("-----------------extra--------------------")
for layer in add_extras(extras[str(size)], 1024):
print(layer)
def feature_transform_module(vgg, extral, size):
if size == 300:
up_size = 38
elif size == 512:
up_size = 64
layers = []
#conv4_3
layers += [BasicConv(vgg[24].out_channels, 256, kernel_size=1, padding=0)]
#fc_7
layers += [BasicConv(vgg[-2].out_channels, 256, kernel_size=1, padding=0, up_size=up_size)]
#feature 1
layers += [BasicConv(extral[-1].out_channels, 256, kernel_size=1, padding=0, up_size=up_size)]
return vgg, extral, layers
def multibox(fea_channels, cfg, num_classes):
loc_layers = []
conf_layers = []
assert len(fea_channels) == len(cfg)
for i, fea_channel in enumerate(fea_channels):
loc_layers += [nn.Conv2d(fea_channel, cfg[i] * 4, kernel_size=3, padding=1)]
conf_layers += [nn.Conv2d(fea_channel, cfg[i] * num_classes, kernel_size=3, padding=1)]
return (loc_layers, conf_layers)
def pyramid_feature_extractor(size):
layers = []
if size == 300:
layers = [#级联concat 38 ×38 × 768-->> 38 × 38 × 512
BasicConv(256 * 3, 512, kernel_size=3, stride=1, padding=1),
#19
BasicConv(512, 512, kernel_size=3, stride=2, padding=1),
#10
BasicConv(512, 256, kernel_size=3, stride=2, padding=1),
#5
BasicConv(512, 256, kernel_size=3, stride=2, padding=1),
#3
BasicConv(512, 256, kernel_size=3, stride=2, padding=1),
#1
BasicConv(512, 256, kernel_size=3, stride=2, padding=1)]
return layers
class BasicConv(nn.Module):
def __init__(self, in_planes, out_planes, kernel_size, \
stride=1, padding=0, dilation=1, groups=1, relu=True,\
bn=False, bias=True, up_size=0):
super(BasicConv, self).__init__()
self.out_channels = out_planes
self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride,
padding=padding, dilation=dilation, groups=groups, bias=bias)
self.bn = nn.BatchNorm2d(out_planes, eps=1e-5, momentum=0.01, affine=True) if bn else None
self.relu = nn.ReLU(inplace=True) if relu else None
self.up_size = up_size
self.up_sample = nn.Upsample(size=(up_size, up_size), mode='bilinear') if up_size != 0 else None
def forward(self, x):
x = self.conv(x)
if self.bn is not None:
x = self.bn(x)
if self.relu is not None:
x = self.relu(x)
if self.up_size > 0:
x = self.up_sample(x)
return x
class FSSD(nn.Module):
"""
Args:
base: VGG16 layer for input, size of either 300 or 500
extras: extra layers that feed to multibox loc and conf layers
head: "multibox head" consists of loc and conf conv layers
"""
def __init__(self, base, extras, ft_module, pyramid_ext, head, num_classes, size):
super(FSSD, self).__init__()
self.num_classes = num_classes
#TODO:
self.size = size
#SSD
self.base = nn.ModuleList(base)
self.extras = nn.ModuleList(extras)
self.ft_module = nn.ModuleList(ft_module) #需要级联的层
self.pyramid_ext = nn.ModuleList(pyramid_ext)
self.fea_bn = nn.BatchNorm2d(256 * len(self.ft_module), affine=True)
self.loc = nn.ModuleList(head[0])
self.conf = nn.ModuleList(head[1])
self.softmax = nn.Softmax()
def forward(self, x, test=False):
"""Applies network layers and ops on input image(s) x
Args:
:param x: input image or batch of images. shape[batch, batch * 3, 300, 300]
:param test: phase
:return:
Depanding on phase:
test:
Variable(tensor) of output class label predictions
confidence score, and corresponding location predictions for
each object detected. Shape: [batch, topk, 7]
train:
list of concat outputs from::
1: confidence layers, Shape: [batch * num_priors, num_classes]
2: localization layers, Shape: [batch, num_priors * 4]
3: priorbox layers, Shape: [2, num_priors * 4]
"""
source_features = list()
transformed_teatures = list()
loc = list()
conf = list()
#conv4_3之前 vgg[24]是conv4_3
#apply vgg up to conv4_3 relu
for k in range(23):
x = self.base[k](x)
#conv4_3
source_features.append(x)
for k in range(23, len(self.base)):
x = self.base[k](x)
# fc7
source_features.append(x)
#apply extra layers and cache source layer outputs
for k, v in enumerate(self.extras):
x = F.relu(v(x), inplace=True)
source_features.append(x)
assert len(self.ft_module) == len(source_features) #需要级联
for k, v in enumerate(self.ft_module):
transformed_teatures.append(v(source_features[k])) #级联之前的卷积 三个特征曾分别输入变量x进行卷积操作
concat_fea = torch.cat(transformed_teatures, 1) #channels
x = self.fea_bn(concat_fea)
pyramid_fea = list()
for k, v in enumerate(self.pyramid_ext):
x = v(x)
pyramid_fea.append(x) #完成级联后的新的金字塔特征
#apply multibox head to source layers
for(x, l, c) in zip(pyramid_fea, self.loc, self.conf):
loc.append(l(x).permute(0, 2, 3, 1).contiguous()) #contiguous后可以view 指向临近指针 否则报错
conf.append(c(x).permute(0, 2, 3, 1).contiguous())
loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1) #view 将数组变为按照-1维度的数组, -1按照原参数的列数自行推断
conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1)
if test:
output = (
loc.view(loc.size(0), -1, 4), #loc preds
self.softmax(conf.view(-1, self.num_classes))) #conf preds
else:
output = (
loc.view(loc.size(0), -1, 4),
conf.view(conf.size(0), -1, self.num_classes))
return output
#调用:add_extras(extras[str(size)], 1024)