MobileNet模型实现

最新推荐文章于 2024-07-27 17:39:19 发布

czlczl20020925

最新推荐文章于 2024-07-27 17:39:19 发布

阅读量158

点赞数 3

文章标签： python

本文链接：https://blog.csdn.net/czlczl20020925/article/details/138498728

版权

V2

import torch
from torch import nn


# 官网的代码，返回离v最近的divisor整数倍的数
def _make_divisible(v, divisor, min_value=None):
    """
    This function is taken from the original tf repo.
    It ensures that all layers have a channel number that is divisible by 8
    It can be seen here:
    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
    :param v:
    :param divisor:
    :param min_value:
    :return:
    """
    if min_value is None:
        min_value = divisor
    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
    # 确保向下舍入不超过10%
    if new_v < 0.9 * v:
        new_v += divisor
    return new_v


class ConvBNReLU6(nn.Sequential):
    # groups==in_channel时即为DW卷积
    def __init__(self, in_channel, out_channel, kernel_size=3, stride=1, groups=1):
        padding = (kernel_size - 1) // 2
        super(ConvBNReLU6, self).__init__(
            nn.Conv2d(in_channel, out_channel, kernel_size, stride, padding, groups=groups, bias=False),
            nn.BatchNorm2d(out_channel),
            nn.ReLU6(inplace=True)
        )


class InvertedResidual(nn.Module):
    def __init__(self, in_channel, out_channel, stride, expand_ratio):
        super(InvertedResidual,self).__init__()
        self.use_shortcut = (stride == 1 and in_channel == out_channel)
        hidden_channel = in_channel * expand_ratio
        layers = []
        # 第一层Conv如果没有进行升维（expandratio=1），则不添加第一个升维卷积层
        if expand_ratio != 1:
            layers.append(ConvBNReLU6(in_channel, hidden_channel, kernel_size=1, stride=1, groups=1))
        # 最后一层不用Relu6激活函数，用线性激活函数，在代码实现中即不用激活函数
        layers.extend([ConvBNReLU6(hidden_channel, hidden_channel, kernel_size=3, stride=stride, groups=hidden_channel),
                       nn.Conv2d(hidden_channel, out_channel, kernel_size=1, stride=1, padding=0, groups=1, bias=False),
                       nn.BatchNorm2d(out_channel)])
        self.sequential = nn.Sequential(*layers)

    def forward(self, x):
        y = self.sequential(x)
        if self.use_shortcut:
            return y + x
        return y


class MobileNetV2(nn.Module):
    def __init__(self, num_classes=1000, alpha=1.0, round_nearest=8):
        super(MobileNetV2, self).__init__()
        bottleneck_weight = [
            # t  c  n  s
            [1, 16, 1, 1],
            [6, 24, 2, 2],
            [6, 32, 3, 2],
            [6, 64, 4, 2],
            [6, 96, 3, 1],
            [6, 160, 3, 2],
            [6, 320, 1, 1]
        ]
        block = InvertedResidual
        # input_channel为上一层输出channel数，本层的输入channel数；初始化为第一层的输出channel
        input_channel = _make_divisible(32 * alpha, round_nearest)
        layers = []
        layers.append(ConvBNReLU6(in_channel=3, out_channel=input_channel, kernel_size=3, stride=2))
        for t, c, n, s in bottleneck_weight:
            # output_channel:本层输出channel
            output_channel = _make_divisible(c * alpha, round_nearest)
            for i in range(n):
                # 此处s若为bottleneck所在层的步距，则s指第一层（共n层）bottleneck的步距，后面的bottlneck的步距仍为1
                stride = s if i == 0 else 1
                layers.append(
                    block(in_channel=input_channel, out_channel=output_channel, stride=stride, expand_ratio=t))
                input_channel = output_channel
        last_channel = _make_divisible(1280 * alpha, round_nearest)
        layers.append(ConvBNReLU6(in_channel=output_channel, out_channel=last_channel, kernel_size=1, stride=1))
        layers.append(nn.AdaptiveAvgPool2d((1, 1)))
        layers.append(nn.Flatten())
        self.features = nn.Sequential(*layers)
        self.classfier = nn.Sequential(
            nn.Dropout(0.2),
            nn.Linear(last_channel, num_classes),
            nn.Softmax()
        )
        self._initialize_weights()

    def _initialize_weights(self):
        # 迭代每一个层结构modules
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)

    def forward(self, x):
        x = self.features(x)
        x = self.classfier(x)
        return x

V3

from typing import Callable, List, Optional

import torch
from torch import nn, Tensor
from torch.nn import functional as F
from functools import partial


def _make_divisible(ch, divisor=8, min_ch=None):
    """
    This function is taken from the original tf repo.
    It ensures that all layers have a channel number that is divisible by 8
    It can be seen here:
    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
    """
    if min_ch is None:
        min_ch = divisor
    new_ch = max(min_ch, int(ch + divisor / 2) // divisor * divisor)
    # Make sure that round down does not go down by more than 10%.
    if new_ch < 0.9 * ch:
        new_ch += divisor
    return new_ch


class ConvBNActivation(nn.Sequential):
    def __init__(self, input_channels, output_channels, kernel_size=3, stride=1, groups=1,
                 norm_layer: Optional[Callable[..., nn.Module]] = None,
                 activation_layer: Optional[Callable[..., nn.Module]] = None):
        padding = (kernel_size - 1) // 2
        if norm_layer == None:
            norm_layer = nn.BatchNorm2d
        if activation_layer == None:
            activation_layer = nn.ReLU6
        super(ConvBNActivation, self).__init__(
            nn.Conv2d(in_channels=input_channels, out_channels=output_channels, kernel_size=kernel_size, stride=stride,
                      padding=padding, groups=groups, bias=False), norm_layer(num_features=output_channels),
            activation_layer(inplace=True))


class SqueezeExcitation(nn.Module):
    def __init__(self, input_channels):
        super(SqueezeExcitation, self).__init__()
        hidden_channels = _make_divisible(input_channels // 4, 8)
        self.pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc1 = nn.Conv2d(input_channels, hidden_channels, 1)
        self.fc2 = nn.Conv2d(hidden_channels, input_channels, 1)

    def forward(self, x):
        scale = self.pool(x)
        scale = self.fc1(scale)
        scale = F.relu(scale)
        scale = self.fc2(scale)
        scale = F.hardswish(scale)
        return scale * x


class InvertedResidualConfig:
    # alpha:控制通道数的参数
    def __init__(self, input_channels, kernel_size, exp_size, output_channels, use_se, activation_layer, stride,
                 alpha: float = 1.0):
        super(InvertedResidualConfig, self).__init__()
        self.input_channels = _make_divisible(input_channels * alpha, 8)
        self.output_channels = _make_divisible(output_channels * alpha, 8)
        self.kernel_size = kernel_size
        self.exp_size = _make_divisible(exp_size * alpha, 8)
        self.use_se = use_se
        self.activation_layer = activation_layer
        self.stride = stride
        self.alpha = alpha


class bneck(nn.Module):
    def __init__(self, config: InvertedResidualConfig):
        super(bneck, self).__init__()
        self.use_shortcut = config.stride == 1 and config.input_channels == config.output_channels
        self.layers = []
        if config.exp_size != config.input_channels:
            # 升维所用卷积核size=1，stride=1
            self.layers.append(ConvBNActivation(config.input_channels, config.exp_size, 1, 1, 1,
                                                activation_layer=config.activation_layer))
        self.layers.append(
            ConvBNActivation(config.exp_size, config.exp_size, config.kernel_size, config.stride, config.exp_size,
                             activation_layer=config.activation_layer))
        if config.use_se:
            self.layers.append(SqueezeExcitation(config.exp_size))
        # nn.Identity:返回输入本身,即不用激活函数
        self.layers.append(
            ConvBNActivation(config.exp_size, config.output_channels, 1, 1, 1, activation_layer=nn.Identity))
        self.layers = nn.Sequential(*self.layers)

    def forward(self, x):
        y = self.layers(x)
        if self.use_shortcut:
            return y + x
        return y


class MobileNetV3(nn.Module):
    def __init__(self, num_classes=1000):
        super(MobileNetV3, self).__init__()
        bneck_configs = [
            InvertedResidualConfig(16, 3, 16, 16, False, nn.ReLU6, 1),
            InvertedResidualConfig(16, 3, 64, 24, False, nn.ReLU6, 2),
            InvertedResidualConfig(24, 3, 72, 24, False, nn.ReLU6, 1),
            InvertedResidualConfig(24, 5, 72, 40, True, nn.ReLU6, 2),
            InvertedResidualConfig(40, 5, 120, 40, True, nn.ReLU6, 1),
            InvertedResidualConfig(40, 5, 120, 40, True, nn.ReLU6, 1),
            InvertedResidualConfig(40, 3, 240, 80, False, nn.Hardswish, 2),
            InvertedResidualConfig(80, 3, 200, 80, False, nn.Hardswish, 1),
            InvertedResidualConfig(80, 3, 184, 80, False, nn.Hardswish, 1),
            InvertedResidualConfig(80, 3, 184, 80, False, nn.Hardswish, 1),
            InvertedResidualConfig(80, 3, 480, 112, True, nn.Hardswish, 1),
            InvertedResidualConfig(112, 3, 672, 112, True, nn.Hardswish, 1),
            InvertedResidualConfig(112, 5, 672, 160, True, nn.Hardswish, 2),
            InvertedResidualConfig(160, 5, 960, 160, True, nn.Hardswish, 1),
            InvertedResidualConfig(160, 5, 960, 160, True, nn.Hardswish, 1),
        ]
        self.features = []
        self.features.append(ConvBNActivation(3,16,3,2,1,activation_layer=nn.Hardswish))
        for config in bneck_configs:
            self.features.append(bneck(config))
        self.features.append(ConvBNActivation(160,960,1,1,1,activation_layer=nn.Hardswish))
        self.features.append(nn.AdaptiveAvgPool2d((1,1)))
        self.features = nn.Sequential(*self.features)
        self.classifier = nn.Sequential(
            nn.Linear(960,1280),
            nn.Hardswish(inplace=True),
            nn.Dropout(p=0.2, inplace=True),
            nn.Linear(1280,num_classes)
        )
        self._initialize_weights()

    def _initialize_weights(self):
        # 迭代每一个层结构modules
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x