MobileNet-yolov4

最新推荐文章于 2024-08-08 21:44:51 发布

有温度的AI

最新推荐文章于 2024-08-08 21:44:51 发布

阅读量1.5k

点赞数 1

文章标签：人工智能深度学习 pytorch python cnn

本文链接：https://blog.csdn.net/m0_56247038/article/details/125619631

版权

一、MobileNetV1

1、定义conv_bn结构

def conv_bn(inp, oup, stride = 1):
    return nn.Sequential(
        nn.Conv2d(inp, oup, 3, stride, 1, bias=False), #卷积核大小为3*3
        nn.BatchNorm2d(oup),
        nn.ReLU6(inplace=True)
    )

2、定义conv_dw结构

def conv_dw(inp, oup, stride = 1):
    return nn.Sequential(
        # part1 dw卷积，输出通道等于输入通道
        nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False), #卷积核大小为3*3
        nn.BatchNorm2d(inp),
        nn.ReLU6(inplace=True),

        # part2 普通1*1卷积，调整通道数
        nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
        nn.BatchNorm2d(oup),
        nn.ReLU6(inplace=True),
    )

3、MobileNetV1主干

# stage1最后的输出是52,52,256，作为一个预测特征层
# stage2最后的输出是26,26,512，作为一个预测特征层
# stage3最后的输出是13,13,1024，作为一个预测特征层
class MobileNetV1(nn.Module):
    def __init__(self):
        super(MobileNetV1, self).__init__()
        self.stage1 = nn.Sequential(
            # 416,416,3 -> 208,208,32
            conv_bn(3, 32, 2), #inp=3, oup=32, stride=2
            # 208,208,32 -> 208,208,64
            conv_dw(32, 64, 1), #inp=32, oup=64, stride=1

            # 208,208,64 -> 104,104,128
            conv_dw(64, 128, 2),
            # 104, 104, 128 -> 104,104,128
            conv_dw(128, 128, 1),

            # 104,104,128 -> 52,52,256
            conv_dw(128, 256, 2),
            # 52,52,256 -> 52,52,256
            conv_dw(256, 256, 1), 
        )
            # 52,52,256 -> 26,26,512
        self.stage2 = nn.Sequential(
            # 52,52,256 -> 26,26,512
            conv_dw(256, 512, 2),
            # 26,26,512 -> 26,26,512
            conv_dw(512, 512, 1),
            # 26,26,512 -> 26,26,512
            conv_dw(512, 512, 1),
            # 26,26,512 -> 26,26,512
            conv_dw(512, 512, 1),
            # 26,26,512 -> 26,26,512
            conv_dw(512, 512, 1),
            # 26,26,512 -> 26,26,512
            conv_dw(512, 512, 1),
        )
            # 26,26,512 -> 13,13,1024
        self.stage3 = nn.Sequential(
            # 26,26,512 -> 13,13,1024
            conv_dw(512, 1024, 2),
            # 13,13,1024 -> 13,13,1024
            conv_dw(1024, 1024, 1),
        )
      
    def forward(self, x):
        x = self.stage1(x)
        x = self.stage2(x)
        x = self.stage3(x)
        return x

def mobilenet_v1(pretrained=False, progress=True):
    model = MobileNetV1()
    if pretrained:
        state_dict = torch.load('./model_data/mobilenet_v1_weights.pth')
        model.load_state_dict(state_dict, strict=True)
    return model

只需将MobileNetV1结构替换原yolov4中的CSPDarknet53主干网络即可，后面的Neck层和Head层都是一样的，我们还要定义一个变量in_filters，它对应三个预测特征层的输出通道数，对于不同的主干网络，三个预测特征层的输出通道数是不一样的；还有一点需要注意的是，为了减少参数量，我们把Neck层和Head层中所有的3*3卷积换为了可分离卷积（make_three_conv中有一个，make_five_conv中有两个，yolo_head中有一个，还有down_sample1和down_sample2中的卷积，共六处）

class MobileNetV1(nn.Module):
    def __init__(self, pretrained = False):
        super(MobileNetV1, self).__init__()
        self.model = mobilenet_v1(pretrained=pretrained)

    def forward(self, x):
        out3 = self.model.stage1(x) # 52,52,256
        out4 = self.model.stage2(out3) # 26，26，512
        out5 = self.model.stage3(out4) # 13，13，1024
        return out3, out4, out5

可参考：YOLOV4网络结构搭建_m0_56247038的博客-CSDN博客

二、MobileNetV2

1、定义一个基本的卷积块ConvBNReLU

class ConvBNReLU(nn.Sequential):
    def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1): # groups=1为普通卷积，groups=in_channel为depthwise卷积
        padding = (kernel_size - 1) // 2 # kernel_size=3则padding=1；kernel_size=1则padding=0
        super(ConvBNReLU, self).__init__(
            nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False),
            nn.BatchNorm2d(out_planes),
            nn.ReLU6(inplace=True)
        )

2、定义倒残差结构InvertedResidual

class InvertedResidual(nn.Module):
    def __init__(self, inp, oup, stride, expand_ratio): #expand_ratio指的就是t
        super(InvertedResidual, self).__init__()
        self.stride = stride

        hidden_dim = inp * expand_ratio #倒残差结构中第一个1*1卷积层卷积核的个数，
        # 用于判断在正向传播过程中是否使用shortcut,只有当 stride = 1 且输入特征矩阵与输出特征矩阵 shape 相同的时候才有 shortcut
        self.use_res_connect = self.stride == 1 and inp == oup

        layers = []
        # 倒残差结构中第一个1*1卷积层是用来对通道数进行调整的
        # 如果expand_ratio = 1，也就是说倒残差结构中第一个1*1卷积层输入通道数和输出通道数是相等的，所以可以不要
        # 对应expand_ratio != 1的倒残差结构，就需要第一个1*1卷积层进行通道数的调整，输出通道数为hidden_dim = inp * expand_ratio
        if expand_ratio != 1:
            layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1)) #将第一个1*1卷积层添加进layers中
            
        layers.extend([
            # 此处设置了groups参数，故为dw卷积，dw卷积输出通道数和输入通道数相同
            ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim),
            # 此处的1*1卷积层用的是线性激活函数而非ReLU6，故不能用ConvBNReLU结构了
            nn.Conv2d(hidden_dim, oup, kernel_size=1, stride=1, padding=0, bias=False),
            nn.BatchNorm2d(oup),
            # 这里本来是要加一层激活函数的，但是线性激活函数就是输出等于输入（y=x）,不需要做任何处理
        ])
        self.conv = nn.Sequential(*layers)

    def forward(self, x): # 定义正向传播过程
        if self.use_res_connect:
            return x + self.conv(x) # 使用shortcut
        else:
            return self.conv(x) # 不使用shortcut

3、 MobileNetV2主干

class MobileNetV2(nn.Module):  # 定义MobileNetV2结构
    def __init__(self, num_classes=1000, alpha=1.0, round_nearest=8):  # alpha为一个超参数，卷积核的倍率
        super(MobileNetV2, self).__init__()
        block = InvertedResidual  # 倒残差结构传给block
        input_channel = 32
        last_channel = 1280

        # 倒残差结构参数列表
        # 其中t表示在Inverted Residuals结构中1×1卷积升维的倍率(相较于输入通道而言的)
        # c是输出特征矩阵的深度 channel
        # n表示bottleneck(即Inverted Residuals结构)重复的次数
        # s表示步距，但是只表示第一个bottleneck中DW卷积的步距，后面重复bottleneck的stride都是等于1的
        inverted_residual_setting = [
            # t, c, n, s
            # 208,208,32 -> 208,208,16
            [1, 16, 1, 1],
            # 208,208,16 -> 104,104,24
            [6, 24, 2, 2],
            # 104,104,24 -> 52,52,32 作为一个预测特征层
            [6, 32, 3, 2],
            # 52,52,32 -> 26,26,64
            [6, 64, 4, 2],
            # 26,26,64 -> 26,26,96 作为一个预测特征层
            [6, 96, 3, 1],
            # 26,26,96 -> 13,13,160
            [6, 160, 3, 2],
            # 13,13,160 -> 13,13,320 作为一个预测特征层
            [6, 320, 1, 1],
        ]

        features = []
        # 定义第一层卷积层，输入为RGB三通道，输出为input_channel
        features.append(ConvBNReLU(3, input_channel, stride=2))
        # 定义一系列bottleneck层
        for t, c, n, s in inverted_residual_setting:
            output_channel = _make_divisible(c * alpha, round_nearest)
            # output_channel = [16,24,32,64,96,160,320]
            for i in range(n):
                # 第一个bottleneck中DW卷积的步距为s，后面重复的bottleneck的stride都是等于1的
                stride = s if i == 0 else 1
                features.append(block(input_channel, output_channel, stride, expand_ratio=t))
                input_channel = output_channel #将output_channel传给input_channel作为下一层的输入通道数
        # 定义倒数第三层的卷积层
        features.append(ConvBNReLU(input_channel, last_channel, kernel_size=1)) # last_channel = 1280
        # combine feature layers
        self.features = nn.Sequential(*features)  # 将一系列层结构打包成一个整体
        # -----------------------以上是特征提取部分-------------------------

        # 权重初始化 weight initialization
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out')
                if m.bias is not None:
                    nn.init.zeros_(m.bias)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.ones_(m.weight)
                nn.init.zeros_(m.bias)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.zeros_(m.bias)

    def forward(self, x):  # 前向传播过程
        x = self.features(x)
        # x = self.avgpool(x)
        # x = torch.flatten(x, 1)
        # x = self.classifier(x)
        return x


def mobilenet_v2(pretrained=False, progress=True):
    model = MobileNetV2()
    if pretrained:
        state_dict = load_state_dict_from_url(model_urls['mobilenet_v2'], model_dir="model_data",
                                              progress=progress)
        model.load_state_dict(state_dict)

    return model

只需将MobileNetV2结构替换原yolov4中的CSPDarknet53主干网络即可，后面的Neck层和Head层都是一样的，我们还要定义一个变量in_filters，它对应三个预测特征层的输出通道数，对于不同的主干网络，三个预测特征层的输出通道数是不一样的；还有一点需要注意的是，为了减少参数量，我们把Neck层和Head层中所有的3*3卷积换为了可分离卷积（make_three_conv中有一个，make_five_conv中有两个，yolo_head中有一个，还有down_sample1和down_sample2中的卷积，共六处）

class MobileNetV2(nn.Module):
    def __init__(self, pretrained = False):
        super(MobileNetV2, self).__init__()
        self.model = mobilenet_v2(pretrained=pretrained)

    def forward(self, x):
        out3 = self.model.features[:7](x) #52,52,32 获取features中前0~6层的结果，从零开始数的
        out4 = self.model.features[7:14](out3) #26,26,96 获取features中7~13层结果
        out5 = self.model.features[14:18](out4) #13,13,320 获取features中14~17层结果
        return out3, out4, out5