基于pytorch的mobilenetV2、V3代码实现

最新推荐文章于 2024-09-14 07:53:32 发布

卡子爹

最新推荐文章于 2024-09-14 07:53:32 发布

阅读量2.5k

点赞数 1

分类专栏： backbone 文章标签： pytorch 深度学习计算机视觉

本文链接：https://blog.csdn.net/weixin_44422920/article/details/123540037

版权

backbone 专栏收录该内容

2 篇文章 0 订阅

订阅专栏

MobileNet V2

创新：
1、改进ResNet中的Bottleneck，提出逆残差结构。11conv先进行升维，33深度可分离卷积，11进行降维。
2、最后的11conv不使用ReLU。
MobileNetV2网络结构

MobileNet V2代码实现

import torch
import torch.nn as nn


def _make_divisible(in_channels, diviosr=8, min_channel=None):
    # 将输入特征图通道数 调整为 divisor的整数倍
    if min_channel is None:
        min_channel = diviosr
    # 四舍五入
    new_channel = max(min_channel, int(in_channels + diviosr / 2) // diviosr * diviosr)
    # 确保向下取整时不会超过10%
    if new_channel < 0.9 * in_channels:
        new_channel += diviosr
    return new_channel


class ConvBNRelu(nn.Sequential):
    def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, groups=1):
        padding = (kernel_size - 1) // 2
        super(ConvBNRelu, self).__init__(
            nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, groups=groups, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU6(inplace=True)
        )


class InvertResidual(nn.Module):
    def __init__(self, in_channels, out_channels, stride, expand_ratio):
        super(InvertResidual, self).__init__()

        hidden_channels = in_channels * expand_ratio
        self.use_shortcut = stride == 1 and in_channels == out_channels

        layers = []
        if expand_ratio != 1:
            # 如果expand_ratio不等于1 就有第一层的1*1conv 等于1时 没有逆bottleneck中的第一个1*1conv
            # 1*1 conv
            layers.append(ConvBNRelu(in_channels, hidden_channels, kernel_size=1))
        # extend == append extend为批量插入
        layers.extend([
            # 3*3 depth-wise conv
            ConvBNRelu(hidden_channels, hidden_channels, kernel_size=3, stride=stride, groups=hidden_channels),
            # 1*1 conv (linear)
            nn.Conv2d(hidden_channels, out_channels, kernel_size=1, bias=False),
            nn.BatchNorm2d(out_channels)
        ])

        self.conv = nn.Sequential(*layers)

    def forward(self, x):
        if self.use_shortcut:
            return x + self.conv(x)
        else:
            return self.conv(x)


class Mobilenet_V2(nn.Module):
    # alpha 控制卷积层通道数的超参数
    def __init__(self, num_classes, alpha=1.0, round_nearest=8):
        super(Mobilenet_V2, self).__init__()

        block = InvertResidual
        # 将input、last channels 调整为8的整数倍
        input_channels = _make_divisible(32 * alpha, round_nearest)
        last_channels = _make_divisible(1280 * alpha, round_nearest)

        invert_residual_setting = [
            # t c n s
            # t 扩展因子 扩大通道数
            # c 输出特征矩阵的深度channel
            # n bottlenec k重复次数
            # s 第一个bottleneck中的步长
            [1, 16, 1, 1],
            [6, 24, 2, 2],
            [6, 32, 3, 2],
            [6, 64, 4, 2],
            [6, 96, 3, 1],
            [6, 160, 3, 2],
            [6, 320, 1, 1],
        ]

        features = []
        # conv1
        features.append(ConvBNRelu(in_channels=3, out_channels=input_channels, stride=2))
        # invert bottleneck
        for t, c, n, s in invert_residual_setting:
            output_channels = _make_divisible(c * alpha, round_nearest)
            for i in range(n):
                stride = s if i == 0 else 1
                features.append(block(input_channels, output_channels, stride, expand_ratio=t))
                input_channels = output_channels

        # 1*1 conv
        features.append(ConvBNRelu(input_channels, last_channels, kernel_size=1))
        # combine features layer
        self.features = nn.Sequential(*features)

        # classifier head
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) # 指定输出矩阵的H,W
        self.classifier = nn.Sequential(
            nn.Dropout(0.2),
            nn.Linear(last_channels, num_classes)
        )

        # weights init
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out')
                if m.bias is not None:
                    nn.init.zeros_(m.bias)

            elif isinstance(m, nn.BatchNorm2d):
                nn.init.ones_(m.weight)
                nn.init.zeros_(m.bias)

            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.zeros_(m.bias)

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)

        return x


if __name__ == '__main__':
    a = torch.randn(1, 3, 224, 224)
    net = Mobilenet_V2(num_classes=5)
    out = net(a)
    print(out.shape)

MobileNet V3

使用SE注意力机制模块，并且在网络中深度卷积大量使用55大小的卷积核替换33卷积核。h-swish激活函数在保持精度情况下加快速度。
MobileNetV3网络结构

根据以上MobilenetV3-Large网络结构，实现代码如下：

from typing import Callable, List, Optional
import torch
from torch import nn, Tensor
from torch.nn import functional as F
from functools import partial


def _make_divisible(in_channels, diviosr=8, min_channel=None):
    # 将输入特征图通道数 调整为 divisor的整数倍
    if min_channel is None:
        min_channel = diviosr
    # 四舍五入
    new_channel = max(min_channel, int(in_channels + diviosr / 2) // diviosr * diviosr)
    # 确保向下取整时不会超过10%
    if new_channel < 0.9 * in_channels:
        new_channel += diviosr
    return new_channel


class ConvBNAct(nn.Sequential):
    def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, group=1, act=None):
        padding = (kernel_size - 1) // 2
        norm_layer = partial(nn.BatchNorm2d, eps=0.001, momentum=0.01)
        if act is None:
            act = nn.ReLU6
        super(ConvBNAct, self).__init__(nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size,
                                                  stride=stride, padding=padding, groups=group, bias=False),
                                        norm_layer(out_channels),
                                        act(inplace=True))


class SE_Attention(nn.Module):
    def __init__(self, in_channels, squeeze_factor=4):
        super(SE_Attention, self).__init__()
        # 两个fc层 第一个fc层为输入通道数/4 第二个fc与输入通道数保持一致
        squeeze_hidden = _make_divisible(in_channels // squeeze_factor, 8)
        self.fc1 = nn.Conv2d(in_channels, squeeze_hidden, 1)
        self.fc2 = nn.Conv2d(squeeze_hidden, in_channels, 1)

    def forward(self, x):
        scale = F.adaptive_avg_pool2d(x, output_size=(1, 1))
        scale = self.fc1(scale)
        scale = F.relu(scale, inplace=True)
        scale = self.fc2(scale)
        scale = F.hardsigmoid(scale, inplace=True)

        return scale * x


class InveredResidualConfig:
    def __init__(self, in_channels, kernel_size, expand_channels, output_channels,
                 use_SE, act, stride, channels_factor_V2alpha):
        self.in_channels = self.adjust_channels(in_channels, channels_factor_V2alpha)
        self.kernel = kernel_size
        # 1*1conv layer 除第一个bneck外，其余 升维
        self.expand_channels = self.adjust_channels(expand_channels, channels_factor_V2alpha)
        self.output_channels = self.adjust_channels(output_channels, channels_factor_V2alpha)
        self.use_SE = use_SE
        self.use_HardSWISH = act == 'HardSWISH'
        self.stride = stride

    @staticmethod
    def adjust_channels(channels, channels_factor_V2alpha):
        return _make_divisible(channels * channels_factor_V2alpha, 8)


class InvertedResidual(nn.Module):
    def __init__(self, config: InveredResidualConfig):
        super(InvertedResidual, self).__init__()

        self.use_resdiual_connect = (config.stride == 1 and config.in_channels == config.output_channels)

        layers = []

        activation = nn.Hardswish if config.use_HardSWISH else nn.ReLU

        if config.expand_channels != config.in_channels:
            layers.append(ConvBNAct(
                config.in_channels,
                config.expand_channels,
                kernel_size=1,
                act=activation
            ))
        # depth-wise
        layers.append(ConvBNAct(
            config.expand_channels,
            config.expand_channels,
            kernel_size=config.kernel,
            stride=config.stride,
            group=config.expand_channels,
            act=activation
        ))
        if config.use_SE:
            layers.append(SE_Attention(config.expand_channels))

        # 降维
        layers.append(ConvBNAct(config.expand_channels,
                                config.output_channels,
                                kernel_size=1,
                                act=nn.Identity))

        self.block = nn.Sequential(*layers)
        self.out_channels = config.output_channels

    def forward(self, x):
        result = self.block(x)
        if self.use_resdiual_connect:
            result += x

        return result


class MobilenetV3(nn.Module):
    def __init__(self,
                 inverted_resdual_setting: InveredResidualConfig,
                 last_channels,
                 num_classes,):
        super(MobilenetV3, self).__init__()

        block = InvertedResidual
        # norm_layer = partial(nn.BatchNorm2d, eps=0.001, momentum=0.01)

        layers = []

        firstconv_output_channel = inverted_resdual_setting[0].in_channels
        layers.append(ConvBNAct(3, firstconv_output_channel, kernel_size=3, stride=2, act=nn.Hardswish))

        # build inverted residual block
        for config in inverted_resdual_setting:
            layers.append(block(config))

        # build last layers
        lastconv_input_channels = inverted_resdual_setting[-1].output_channels
        lastconv_output_channels = 6 * lastconv_input_channels
        layers.append(ConvBNAct(lastconv_input_channels, lastconv_output_channels, kernel_size=1, act=nn.Hardswish))

        self.features = nn.Sequential(*layers)
        self.avgpool = nn.AdaptiveAvgPool2d(1)
        # classification
        self.classifier = nn.Sequential(
            nn.Linear(lastconv_output_channels, last_channels),
            nn.Hardswish(inplace=True),
            nn.Dropout(p=0.2, inplace=True),
            nn.Linear(last_channels, num_classes)
        )

        # weights init
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out')
                if m.bias is not None:
                    nn.init.zeros_(m.bias)

            elif isinstance(m, nn.BatchNorm2d):
                nn.init.ones_(m.weight)
                nn.init.zeros_(m.bias)

            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.zeros_(m.bias)

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)

        return x


def mobilenet_large(num_classes=1000):
    channels_factor_V2alpha = 1.0
    bneck_config = partial(InveredResidualConfig, channels_factor_V2alpha=channels_factor_V2alpha)
    adjust_channels = partial(InveredResidualConfig.adjust_channels, channels_factor_V2alpha=channels_factor_V2alpha)

    inverted_residual_setting = [
        # input_c, kernel, expanded_c, out_c, use_se, activation, stride
        bneck_config(16, 3, 16, 16, False, "RE", 1),
        bneck_config(16, 3, 64, 24, False, "RE", 2),  # C1
        bneck_config(24, 3, 72, 24, False, "RE", 1),
        bneck_config(24, 5, 72, 40, True, "RE", 2),  # C2
        bneck_config(40, 5, 120, 40, True, "RE", 1),
        bneck_config(40, 5, 120, 40, True, "RE", 1),
        bneck_config(40, 3, 240, 80, False, "HardSWISH", 2),  # C3
        bneck_config(80, 3, 200, 80, False, "HardSWISH", 1),
        bneck_config(80, 3, 184, 80, False, "HardSWISH", 1),
        bneck_config(80, 3, 184, 80, False, "HardSWISH", 1),
        bneck_config(80, 3, 480, 112, True, "HardSWISH", 1),
        bneck_config(112, 3, 672, 112, True, "HardSWISH", 1),
        bneck_config(112, 5, 672, 160, True, "HardSWISH", 2),  # C4
        bneck_config(160, 5, 960, 160, True, "HardSWISH", 1),
        bneck_config(160, 5, 960, 160 , True, "HardSWISH", 1),
    ]

    last_channels = adjust_channels(1280)

    return MobilenetV3(inverted_resdual_setting=inverted_residual_setting,
                       last_channels=last_channels,
                       num_classes=num_classes)


if __name__ == '__main__':
    a = torch.randn(1, 3, 224, 224)
    net = mobilenet_large(num_classes=5)
    out = net(a)
    print(out.shape)