CSPDarknet53-pytorch实现

import torch
import torch.nn as nn
from torch.nn import functional as F
from torch import Tensor
import math
from torchsummary import summary


BN_MOMENTUM = 0.1


# Resnet中对Conv2d进行封装,定义3*3的卷积核模板,我觉得没必要,反而会使得代码更混乱,不过熟悉了这种方法应该会觉得也不错
def conv3x3(in_planes, out_planes, stride=1):
    """3x3 convolution with padding"""
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                     padding=1, bias=False, padding_mode='zeros')


# 对Conv2d进行封装,定义1*1的卷积核模板
def conv1x1(in_planes: int, out_planes: int, stride: int = 1) -> nn.Conv2d:
    """1x1 convolution"""
    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)


# 定义Mish激活函数
class Mish(nn.Module):
    def __init__(self):
        super(Mish, self).__init__()

    def forward(self, x: Tensor) -> Tensor:
        return x * torch.tanh(F.softplus(x))  # https://zhuanlan.zhihu.com/p/263555912


# 先定义基本的”卷积块“   Conv + BatchNormalization + Mish
# 因为Darknet的块是    卷积+卷积+残差
class ConvBNMish(nn.Module):
    expansion = 1

    def __init__(self, in_channels, out_channels, kernel_size, stride=1):
        super(ConvBNMish, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, kernel_size // 2, bias=False)
        self.bn = nn.BatchNorm2d(out_channels, momentum=BN_MOMENTUM)
        self.activation = Mish()

    def forward(self, x):
        out = self.conv(x)
        out = self.bn(out)
        # 这里激活放在BN后,可以尝试放在BN前
        out = self.activation(out)

        return out


# CSP-darknet的结构块的组成部分
# 内部堆叠的残差块
class ResBlock(nn.Module):
    def __init__(self, in_channels, hidden_channels=None):
        super(ResBlock, self).__init__()

        # hidden_channels 指的是残差块中间的维度,看不见的维度,输入维度 -> hidden_channels -> 输出维度
        if hidden_channels is None:
            hidden_channels = in_channels

        # BasicBlock
        # 每个Block有两个”卷积块“
        self.block = nn.Sequential(ConvBNMish(in_channels, hidden_channels, kernel_size=1),
                                   ConvBNMish(hidden_channels, in_channels, kernel_size=3))

    # 残差函数
    def forward(self, x):
        return x + self.block(x)  # 对应残差块 H(x) = F(x) + x


# CSPDarknet 的结构快,
# 存在一个大残差边
# 这个大残差边绕过了很多的残差结构
# 第一个 CSP结构和后面的不同,没有残差块,参考YOLOv3论文
class ResBlockBody(nn.Module):
    def __init__(self, in_channels, out_channels, num_block, first):
        super(ResBlockBody, self).__init__()
        # 下采样
        self.downsample = ConvBNMish(in_channels, out_channels, kernel_size=3, stride=2)  # 3*3

        if first:
            # 第一个块的输出维度与输入维度相同
            self.split_conv0 = ConvBNMish(out_channels, out_channels, kernel_size=1)  # csp net part1
            self.split_conv1 = ConvBNMish(out_channels, out_channels, kernel_size=1)  # csp net part2
            self.blocks_conv = nn.Sequential(  # csp net part2
                ResBlock(in_channels=out_channels, hidden_channels=out_channels // 2),
                ConvBNMish(out_channels, out_channels, kernel_size=1)
            )
            self.concat_conv = ConvBNMish(out_channels * 2, out_channels, 1)  # csp net 连接

        else:
            # 从第二个块开始 输出维度是输入维度的一半
            self.split_conv0 = ConvBNMish(out_channels, out_channels // 2, 1)
            self.split_conv1 = ConvBNMish(out_channels, out_channels // 2, 1)
            self.blocks_conv = nn.Sequential(
                *[ResBlock(out_channels // 2) for _ in range(num_block)],  # 多个resblock堆叠
                ConvBNMish(out_channels // 2, out_channels // 2, kernel_size=1)
            )
            self.concat_conv = ConvBNMish(out_channels, out_channels, 1)

    def forward(self, x):
        # 先下采样
        x = self.downsample(x)
        # csp part1
        x0 = self.split_conv0(x)
        # csp part2
        x1 = self.split_conv1(x)
        x1 = self.blocks_conv(x1)
        # csp 通道连接
        out = torch.cat([x0, x1], dim=1)
        # 最后再卷积
        out = self.concat_conv(out)

        return out


# CSPDarknet53 完整的网络结构
class CSPDarknet53(nn.Module):
    def __init__(self, layer_num, num_classes):  # layers_num:[1,2,8,8,4] 代表 ResBlock的重复个数
        super(CSPDarknet53, self).__init__()
        # 需要先定义第一个残差块的输入,总输入
        self.in_channels = 32

        # 输入维度为3,即GB图片 3通道, 输出通道的self.in_channels是残差块的输入
        self.conv1 = ConvBNMish(3, self.in_channels, kernel_size=3, stride=1)

        # 每个残差块的维度变换,输出维度
        filters = [64, 128, 256, 512, 1024]

        # 构造残差块的主体, layer_num
        self.stages = nn.ModuleList([
            ResBlockBody(self.in_channels, filters[0], layer_num[0], first=True),
            ResBlockBody(filters[0], filters[1], layer_num[1], first=False),
            ResBlockBody(filters[1], filters[2], layer_num[2], first=False),
            ResBlockBody(filters[2], filters[3], layer_num[3], first=False),
            ResBlockBody(filters[3], filters[4], layer_num[4], first=False)
        ])

        self.global_pooling = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.LeakyReLU(filters[4], num_classes)

        # 权值初始化,可以选择多种,
        # 1. 使用net.modules()遍历模型中的网络层的类型
        # 2. 对其中的m层的weigth.data(tensor)部分进行初始化操作
        # dense net 的初始化方法
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                m.bias.data.zero_()

    def forward(self, x):
        out = self.conv1(x)

        out = self.stages[0](out)
        out = self.stages[1](out)
        out3 = self.stages[2](out)
        out4 = self.stages[3](out3)
        out5 = self.stages[4](out4)
        out = self.global_pooling(out5)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return F.softmax(out, dim=1)


def darknet_53(num_classes=2):
    return CSPDarknet53([1, 2, 8, 8, 4], num_classes)


net = darknet_53()
summary(net, (3, 256, 256))

笔记,把这几天的学习记录一下。

  • 1
self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, kernel_size // 2, bias=False)

这里padding = kernel_size // 2,根据卷积核的大小变化。
参考这里

在pytorch中,如果不设置padding,默认为valid,在Tensorflow中valid为不补零,这样输出和输入会不一致。
参考这里

有一个参数是dilation ,设置是否是空洞卷积。默认为0。不使用。

  • 2
self.bn = nn.BatchNorm2d(out_channels, momentum=BN_MOMENTUM)

num_features应该是batch_size * width * height
参考这里

bn设置的参数实际上是channel的参数,但其实代码中是输入的通道数
详细看这里

如果不设置,momentum默认为0.1。

参考这里

  • 3

需要安装torchsummary包,可以使用summary。非常好的工具。

  • 4

还有一个expansion参数没用上,原版的resnet中需要,是对通道的倍乘。因为进来的时候对输入通道进行了降维,需要恢复。等之后用到再细细研究。
给个很棒的讲解。这里

resnet,基础版的不同之处只在于这里是三个卷积,分别是1x1,3x3,1x1,分别用来压缩维度,卷积处理,恢复维度,inplane是输入的通道数,plane是输出的通道数,expansion是对输出通道数的倍乘,在basic中expansion是1,此时完全忽略expansion这个东东,输出的通道数就是plane,然而bottleneck就是不走寻常路,它的任务就是要对通道数进行压缩,再放大,于是,plane不再代表输出的通道数,而是block内部压缩后的通道数,输出通道数变为planeexpansion。接着就是网络主体了。*

  1. pytorch踩坑
  2. group群卷积
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值