CenterNet网络模型代码_1-msra_resnet.py

53 篇文章 3 订阅
46 篇文章 7 订阅
这里开始进行阅读CenterNet源码模型中的models\networks的部分的网络代码,从简单的 Resnet 模型开始,这里先给出卷积的输入输出的计算公式,以及反卷积的输入输出计算公式:

1 2维卷积操作(Conv2d):
nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True))
参数解析:
in_channel: 输入数据的通道数,例RGB图片通道数为3;out_channel: 输出数据的通道数,这个根据模型调整;kennel_size: 卷积核大小,可以是int,或tuple;kennel_size=2,意味着卷积大小(2,2), kennel_size=(2,3),意味着卷积大小(2,3)即非正方形卷积;
stride:步长,默认为1,与kennel_size类似,stride=2,意味着步长上下左右扫描皆为2, stride=(2,3),左右扫描步长为2,上下为3;
padding: 零填充
dilation:空洞卷积参数
groups : 组卷积,该参数必须能被输入通道整除

假设输入图像大小为 (H,W), 卷积核大小为 K,strides步长为S, Padding 填充为 P,则经过该卷积后,其输出的计算公式为:
1) H’=(H-K+2P)/S+1
2) W’=(w-K+2P)/S+1
2 反卷积操作(ConvTranspose2d):
其参数为ConvTranspose2d(in_channels,out_channels,kernel_size,stride=1,padding=0,output_padding=0, groups=1,bias=True,dilation=1)
参数解析:
in_channels(int) —输入信号的通道数
out_channels(int) ----卷积产生的通道数
kernel_size(int or tuple) —卷积核的大小
stride(int or tuple,optional) —卷积步长,即要将输入扩大的倍数
padding(int or tuple,optional) —输入的每一条边补充0的层数,高宽都增加2padding
output_padding(int or tuple,optional) —输出边补充0的层数,高宽都增加2
padding
groups(int ,optional) —从输入通道到输出通道的阻塞连接数
bias(bool, optional) —若bias=True,添加偏置
dilation(int or tuple,optional)—卷积核元素之间的间距
其中
假设输入图像大小为 (H,W), 卷积核大小为 K,strides步长为S, Padding 填充为 P,则经过该卷积后,其输出的计算公式为:
在这里插入图片描述
一般来说,我们希望得到输入特征图大小/输出特征图大小 = stride的话,代入上面的式子能够得到结果:
padding = (kernel_size - stride + output_padding )/2
所以为了让padding = (kernel_size - 1)/2,则output_padding应该取值为stride - 1,这样就能够满足输入特征图大小/输出特征图大小 = stride

二话不说,上代码:
# BachNorm2d函数的动量参数
BN_MOMENTUM = 0.1
#预训练模型链接
model_urls = {
    'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
    'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
    'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
    'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
}
# 带Padding的3*3的卷积模块
def conv3x3(in_planes, out_planes, stride=1):
    """3x3 convolution with padding"""
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,padding=1, bias=False)

# 堆叠的 两层 3*3 的网络
class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        # 两个3*3的卷积模块
        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
        # 是否存在下采样模块
        self.downsample = downsample
        self.stride = stride
     # 网络前向传播
    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        # 当下采样的时候
        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out

# 瓶颈层 标准版的
class Bottleneck(nn.Module):
    expansion = 4# 输入和输出的扩展率,一般为4 即 输入64通道 输出 256通道 
    #初始化 模型模块
    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
                               padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
        self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1,
                               bias=False)
        self.bn3 = nn.BatchNorm2d(planes * self.expansion,
                                  momentum=BN_MOMENTUM)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out

# 构建 backbone 模型
class PoseResNet(nn.Module):

    def __init__(self, block, layers, heads, head_conv, **kwargs):
        self.inplanes = 64 # 第一层卷积的输入通道数
        self.deconv_with_bias = False# 不使用可变形卷积
        self.heads = heads# 头部
        super(PoseResNet, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,bias=False) # 预处理模块
        self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)# 进行BN
        self.relu = nn.ReLU(inplace=True)# 进行Relu
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)# MaxPool # 以上共下采样4倍
        # 组合backbone
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)

        # used for deconv layers
        self.deconv_layers = self._make_deconv_layer(
            3,
            [256, 256, 256],
            [4, 4, 4],
        )
        # self.final_layer = []

        for head in sorted(self.heads):
          num_output = self.heads[head]
          if head_conv > 0:
            fc = nn.Sequential(
                nn.Conv2d(256, head_conv,
                  kernel_size=3, padding=1, bias=True),
                nn.ReLU(inplace=True),
                nn.Conv2d(head_conv, num_output, 
                  kernel_size=1, stride=1, padding=0))
          else:
            fc = nn.Conv2d(
              in_channels=256,
              out_channels=num_output,
              kernel_size=1,
              stride=1,
              padding=0
          )
          self.__setattr__(head, fc)

        # self.final_layer = nn.ModuleList(self.final_layer)
    # 构建卷积层
    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None # 下采样模型
        # 判断是否使用下采样 当stride!=1 或者 输入和输出扩展率不一致时运用
        if stride != 1 or self.inplanes != planes * block.expansion:
            # 下采样模块为 1*1 的卷积模块
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes * block.expansion,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM),
            )

        layers = []# 层元素列表
        layers.append(block(self.inplanes, planes, stride, downsample))# 第一个残差块 
        self.inplanes = planes * block.expansion # 输出的通道
        for i in range(1, blocks):# 进行堆叠残差块
            layers.append(block(self.inplanes, planes))

        return nn.Sequential(*layers)# 组成网络序列

    def _get_deconv_cfg(self, deconv_kernel, index):
        if deconv_kernel == 4:
            padding = 1
            output_padding = 0
        elif deconv_kernel == 3:
            padding = 1
            output_padding = 1
        elif deconv_kernel == 2:
            padding = 0
            output_padding = 0

        return deconv_kernel, padding, output_padding
    # 构建可变性卷积
    def _make_deconv_layer(self, num_layers, num_filters, num_kernels):
        # 进行初步验证
        assert num_layers == len(num_filters), \
            'ERROR: num_deconv_layers is different len(num_deconv_filters)'
        assert num_layers == len(num_kernels), \
            'ERROR: num_deconv_layers is different len(num_deconv_filters)'
        # 层列表 num_layers=3
        layers = []
        for i in range(num_layers):
            # 得到卷积的一些配置参数
            #kernel:4 padding:1 output_padding :0
            kernel, padding, output_padding = \
                self._get_deconv_cfg(num_kernels[i], i)
            # 256
            planes = num_filters[i]
            # 追加反卷积操作 self.inplanes 512
            # planes 256 
            # 原始输入2048 即resnet第5阶段的输出
            layers.append(
                nn.ConvTranspose2d(
                    in_channels=self.inplanes,
                    out_channels=planes,
                    kernel_size=kernel,
                    stride=2,
                    padding=padding,
                    output_padding=output_padding,
                    bias=self.deconv_with_bias))
            # 追加 BN
            layers.append(nn.BatchNorm2d(planes, momentum=BN_MOMENTUM))
            # 追加 Relu
            layers.append(nn.ReLU(inplace=True))
            self.inplanes = planes# 更新 inplanes

        return nn.Sequential(*layers)# 这是模型序列
    # 前向传播
    def forward(self, x):
        # 预处理
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)
        # 各个阶段的输出
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        # 反卷积
        x = self.deconv_layers(x)
        ret = {}
        # 输出头 得到输出头的结果字典
        for head in self.heads:
            ret[head] = self.__getattr__(head)(x)
        return [ret]
    # 初始化权重
    def init_weights(self, num_layers, pretrained=True):
        # 默认加载预训练权重
        if pretrained:
            # print('=> init resnet deconv weights from normal distribution')
            for _, m in self.deconv_layers.named_modules():
                # 初始化反卷积模块
                if isinstance(m, nn.ConvTranspose2d):
                    # print('=> init {}.weight as normal(0, 0.001)'.format(name))
                    # print('=> init {}.bias as 0'.format(name))
                    nn.init.normal_(m.weight, std=0.001)
                    if self.deconv_with_bias:
                        nn.init.constant_(m.bias, 0)
                # 初始化 BN 参数
                elif isinstance(m, nn.BatchNorm2d):
                    # print('=> init {}.weight as 1'.format(name))
                    # print('=> init {}.bias as 0'.format(name))
                    nn.init.constant_(m.weight, 1)
                    nn.init.constant_(m.bias, 0)
            # print('=> init final conv weights from normal distribution')
            # 遍历初始化结果部分
            for head in self.heads:
              # 得到对应头部的模型
              final_layer = self.__getattr__(head)
              # 遍历头部模块
              for i, m in enumerate(final_layer.modules()):
                  # 初始化 卷积部分
                  if isinstance(m, nn.Conv2d):
                      # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                      # print('=> init {}.weight as normal(0, 0.001)'.format(name))
                      # print('=> init {}.bias as 0'.format(name))
                      if m.weight.shape[0] == self.heads[head]:
                          if 'hm' in head:
                              nn.init.constant_(m.bias, -2.19)
                          else:
                              nn.init.normal_(m.weight, std=0.001)
                              nn.init.constant_(m.bias, 0)
            #pretrained_state_dict = torch.load(pretrained)
            # 获取预训练的连接
            url = model_urls['resnet{}'.format(num_layers)]
            pretrained_state_dict = model_zoo.load_url(url)#加载预训练模型
            print('=> loading pretrained model {}'.format(url))
            # 加载模型
            self.load_state_dict(pretrained_state_dict, strict=False)
        else:
            print('=> imagenet pretrained model dose not exist')
            print('=> please download it first')
            raise ValueError('imagenet pretrained model does not exist')

# 不同深度的 resnet 模块
resnet_spec = {18: (BasicBlock, [2, 2, 2, 2]),
               34: (BasicBlock, [3, 4, 6, 3]),
               50: (Bottleneck, [3, 4, 6, 3]),
               101: (Bottleneck, [3, 4, 23, 3]),
               152: (Bottleneck, [3, 8, 36, 3])}

# 生成网络模型
def get_pose_net(num_layers, heads, head_conv):
  block_class, layers = resnet_spec[num_layers]# 选择特定深度的模型
  # 生成 backbone
  model = PoseResNet(block_class, layers, heads, head_conv=head_conv)
  model.init_weights(num_layers, pretrained=True)# 初始化权重
  return model

以上就是CenterNet源码中 backbone 为 resnet 的代码说明,如果有问题,其中不好理解的部分其实是反卷积的操作,这个最好把网络模型整体打印出来,就能知道哪些模块到底在做什么! 欢迎指正!

  • 5
    点赞
  • 20
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值