这里开始进行阅读CenterNet源码模型中的models\networks的部分的网络代码,从简单的 Resnet 模型开始,这里先给出卷积的输入输出的计算公式,以及反卷积的输入输出计算公式:
1 2维卷积操作(Conv2d):
nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True))
参数解析:
in_channel: 输入数据的通道数,例RGB图片通道数为3;out_channel: 输出数据的通道数,这个根据模型调整;kennel_size: 卷积核大小,可以是int,或tuple;kennel_size=2,意味着卷积大小(2,2), kennel_size=(2,3),意味着卷积大小(2,3)即非正方形卷积;
stride:步长,默认为1,与kennel_size类似,stride=2,意味着步长上下左右扫描皆为2, stride=(2,3),左右扫描步长为2,上下为3;
padding: 零填充
dilation:空洞卷积参数
groups : 组卷积,该参数必须能被输入通道整除假设输入图像大小为 (H,W), 卷积核大小为 K,strides步长为S, Padding 填充为 P,则经过该卷积后,其输出的计算公式为:
1) H’=(H-K+2P)/S+1
2) W’=(w-K+2P)/S+1
2 反卷积操作(ConvTranspose2d):
其参数为ConvTranspose2d(in_channels,out_channels,kernel_size,stride=1,padding=0,output_padding=0, groups=1,bias=True,dilation=1)
参数解析:
in_channels(int) —输入信号的通道数
out_channels(int) ----卷积产生的通道数
kernel_size(int or tuple) —卷积核的大小
stride(int or tuple,optional) —卷积步长,即要将输入扩大的倍数
padding(int or tuple,optional) —输入的每一条边补充0的层数,高宽都增加2padding
output_padding(int or tuple,optional) —输出边补充0的层数,高宽都增加2padding
groups(int ,optional) —从输入通道到输出通道的阻塞连接数
bias(bool, optional) —若bias=True,添加偏置
dilation(int or tuple,optional)—卷积核元素之间的间距
其中
假设输入图像大小为 (H,W), 卷积核大小为 K,strides步长为S, Padding 填充为 P,则经过该卷积后,其输出的计算公式为:
一般来说,我们希望得到输入特征图大小/输出特征图大小 = stride的话,代入上面的式子能够得到结果:
padding = (kernel_size - stride + output_padding )/2
所以为了让padding = (kernel_size - 1)/2,则output_padding应该取值为stride - 1,这样就能够满足输入特征图大小/输出特征图大小 = stride
二话不说,上代码:
# BachNorm2d函数的动量参数
BN_MOMENTUM = 0.1
#预训练模型链接
model_urls = {
'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
}
# 带Padding的3*3的卷积模块
def conv3x3(in_planes, out_planes, stride=1):
"""3x3 convolution with padding"""
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,padding=1, bias=False)
# 堆叠的 两层 3*3 的网络
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(BasicBlock, self).__init__()
# 两个3*3的卷积模块
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
# 是否存在下采样模块
self.downsample = downsample
self.stride = stride
# 网络前向传播
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
# 当下采样的时候
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
# 瓶颈层 标准版的
class Bottleneck(nn.Module):
expansion = 4# 输入和输出的扩展率,一般为4 即 输入64通道 输出 256通道
#初始化 模型模块
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(Bottleneck, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1,
bias=False)
self.bn3 = nn.BatchNorm2d(planes * self.expansion,
momentum=BN_MOMENTUM)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
# 构建 backbone 模型
class PoseResNet(nn.Module):
def __init__(self, block, layers, heads, head_conv, **kwargs):
self.inplanes = 64 # 第一层卷积的输入通道数
self.deconv_with_bias = False# 不使用可变形卷积
self.heads = heads# 头部
super(PoseResNet, self).__init__()
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,bias=False) # 预处理模块
self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)# 进行BN
self.relu = nn.ReLU(inplace=True)# 进行Relu
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)# MaxPool # 以上共下采样4倍
# 组合backbone
self.layer1 = self._make_layer(block, 64, layers[0])
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
# used for deconv layers
self.deconv_layers = self._make_deconv_layer(
3,
[256, 256, 256],
[4, 4, 4],
)
# self.final_layer = []
for head in sorted(self.heads):
num_output = self.heads[head]
if head_conv > 0:
fc = nn.Sequential(
nn.Conv2d(256, head_conv,
kernel_size=3, padding=1, bias=True),
nn.ReLU(inplace=True),
nn.Conv2d(head_conv, num_output,
kernel_size=1, stride=1, padding=0))
else:
fc = nn.Conv2d(
in_channels=256,
out_channels=num_output,
kernel_size=1,
stride=1,
padding=0
)
self.__setattr__(head, fc)
# self.final_layer = nn.ModuleList(self.final_layer)
# 构建卷积层
def _make_layer(self, block, planes, blocks, stride=1):
downsample = None # 下采样模型
# 判断是否使用下采样 当stride!=1 或者 输入和输出扩展率不一致时运用
if stride != 1 or self.inplanes != planes * block.expansion:
# 下采样模块为 1*1 的卷积模块
downsample = nn.Sequential(
nn.Conv2d(self.inplanes, planes * block.expansion,
kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM),
)
layers = []# 层元素列表
layers.append(block(self.inplanes, planes, stride, downsample))# 第一个残差块
self.inplanes = planes * block.expansion # 输出的通道
for i in range(1, blocks):# 进行堆叠残差块
layers.append(block(self.inplanes, planes))
return nn.Sequential(*layers)# 组成网络序列
def _get_deconv_cfg(self, deconv_kernel, index):
if deconv_kernel == 4:
padding = 1
output_padding = 0
elif deconv_kernel == 3:
padding = 1
output_padding = 1
elif deconv_kernel == 2:
padding = 0
output_padding = 0
return deconv_kernel, padding, output_padding
# 构建可变性卷积
def _make_deconv_layer(self, num_layers, num_filters, num_kernels):
# 进行初步验证
assert num_layers == len(num_filters), \
'ERROR: num_deconv_layers is different len(num_deconv_filters)'
assert num_layers == len(num_kernels), \
'ERROR: num_deconv_layers is different len(num_deconv_filters)'
# 层列表 num_layers=3
layers = []
for i in range(num_layers):
# 得到卷积的一些配置参数
#kernel:4 padding:1 output_padding :0
kernel, padding, output_padding = \
self._get_deconv_cfg(num_kernels[i], i)
# 256
planes = num_filters[i]
# 追加反卷积操作 self.inplanes 512
# planes 256
# 原始输入2048 即resnet第5阶段的输出
layers.append(
nn.ConvTranspose2d(
in_channels=self.inplanes,
out_channels=planes,
kernel_size=kernel,
stride=2,
padding=padding,
output_padding=output_padding,
bias=self.deconv_with_bias))
# 追加 BN
layers.append(nn.BatchNorm2d(planes, momentum=BN_MOMENTUM))
# 追加 Relu
layers.append(nn.ReLU(inplace=True))
self.inplanes = planes# 更新 inplanes
return nn.Sequential(*layers)# 这是模型序列
# 前向传播
def forward(self, x):
# 预处理
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
# 各个阶段的输出
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
# 反卷积
x = self.deconv_layers(x)
ret = {}
# 输出头 得到输出头的结果字典
for head in self.heads:
ret[head] = self.__getattr__(head)(x)
return [ret]
# 初始化权重
def init_weights(self, num_layers, pretrained=True):
# 默认加载预训练权重
if pretrained:
# print('=> init resnet deconv weights from normal distribution')
for _, m in self.deconv_layers.named_modules():
# 初始化反卷积模块
if isinstance(m, nn.ConvTranspose2d):
# print('=> init {}.weight as normal(0, 0.001)'.format(name))
# print('=> init {}.bias as 0'.format(name))
nn.init.normal_(m.weight, std=0.001)
if self.deconv_with_bias:
nn.init.constant_(m.bias, 0)
# 初始化 BN 参数
elif isinstance(m, nn.BatchNorm2d):
# print('=> init {}.weight as 1'.format(name))
# print('=> init {}.bias as 0'.format(name))
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
# print('=> init final conv weights from normal distribution')
# 遍历初始化结果部分
for head in self.heads:
# 得到对应头部的模型
final_layer = self.__getattr__(head)
# 遍历头部模块
for i, m in enumerate(final_layer.modules()):
# 初始化 卷积部分
if isinstance(m, nn.Conv2d):
# nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
# print('=> init {}.weight as normal(0, 0.001)'.format(name))
# print('=> init {}.bias as 0'.format(name))
if m.weight.shape[0] == self.heads[head]:
if 'hm' in head:
nn.init.constant_(m.bias, -2.19)
else:
nn.init.normal_(m.weight, std=0.001)
nn.init.constant_(m.bias, 0)
#pretrained_state_dict = torch.load(pretrained)
# 获取预训练的连接
url = model_urls['resnet{}'.format(num_layers)]
pretrained_state_dict = model_zoo.load_url(url)#加载预训练模型
print('=> loading pretrained model {}'.format(url))
# 加载模型
self.load_state_dict(pretrained_state_dict, strict=False)
else:
print('=> imagenet pretrained model dose not exist')
print('=> please download it first')
raise ValueError('imagenet pretrained model does not exist')
# 不同深度的 resnet 模块
resnet_spec = {18: (BasicBlock, [2, 2, 2, 2]),
34: (BasicBlock, [3, 4, 6, 3]),
50: (Bottleneck, [3, 4, 6, 3]),
101: (Bottleneck, [3, 4, 23, 3]),
152: (Bottleneck, [3, 8, 36, 3])}
# 生成网络模型
def get_pose_net(num_layers, heads, head_conv):
block_class, layers = resnet_spec[num_layers]# 选择特定深度的模型
# 生成 backbone
model = PoseResNet(block_class, layers, heads, head_conv=head_conv)
model.init_weights(num_layers, pretrained=True)# 初始化权重
return model
以上就是CenterNet源码中 backbone 为 resnet 的代码说明,如果有问题,其中不好理解的部分其实是反卷积的操作,这个最好把网络模型整体打印出来,就能知道哪些模块到底在做什么! 欢迎指正!