mobilenet_v2代码理解

最新推荐文章于 2024-05-23 09:17:27 发布

土士

最新推荐文章于 2024-05-23 09:17:27 发布

阅读量1.4k

点赞数 1

分类专栏： pytorch基本模型理解

本文链接：https://blog.csdn.net/weixin_40970506/article/details/104351454

版权

pytorch基本模型理解专栏收录该内容

15 篇文章 2 订阅

订阅专栏


def _make_divisible(v, divisor, min_value=None):
	# 确保所有的通道数被8整除
    """
    This function is taken from the original tf repo.
    It ensures that all layers have a channel number that is divisible by 8
    It can be seen here:
    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
    :param v:  # 通道数
    :param divisor:  除数 8
    :param min_value:  最小除数
    :return:
    """
    if min_value is None:  
        min_value = divisor # 8
    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) # max(8, int(32+8/8))//8*8
    # Make sure that round down does not go down by more than 10%.
    if new_v < 0.9 * v:  # 新的通道数小于原来的90% 则加divisor(8)
        new_v += divisor
    return new_v

class ConvBNReLU(nn.Sequential):
	# 常规的conv+bn+relu ，默认卷积核大小为3，步长为1
    def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1):
        padding = (kernel_size - 1) // 2  # 取整
        super(ConvBNReLU, self).__init__(
            nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False),
            nn.BatchNorm2d(out_planes),
            nn.ReLU6(inplace=True)
        )

class InvertedResidual(nn.Module):
	# 残差模块
	def __init__(self, inp, oup, stride, expand_ratio):
    	#  输入通道数，输出通道数，步长，扩展率
        super(InvertedResidual, self).__init__()
        self.stride = stride # 步长
        assert stride in [1, 2]

        hidden_dim = int(round(inp * expand_ratio))   # 中间隐藏层的通道数
        # 如果步长为1 and 输入通道数与输出通道数相等，则用残差模块
        self.use_res_connect = self.stride == 1 and inp == oup   

        layers = []
        if expand_ratio != 1:
        	# 如果扩展率不为1，则用扩张，扩张输出通道数为expand_ratio（6）倍，即添加卷积核大小为1x1的卷积操作
            # pw  扩张层
            layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1))
        layers.extend([
            # dw 输入与输出层相等，步长等于设置参数，即特征提取
            ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim),
            # pw-linear    压缩层， 即线性层操作
            nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
            nn.BatchNorm2d(oup),
        ])
        self.conv = nn.Sequential(*layers)
	def forward(self, x):
        if self.use_res_connect:  # x + conv(x) 残差操作
            return x + self.conv(x)
        else:  #否则不加x
            return self.conv(x)

class MobileNetV2(nn.Module):# 继承nn.Module类，见下面的代码类，及其参数类型
    def __init__(self,
                 num_classes=1000,
                 width_mult=1.0,
                 inverted_residual_setting=None,
                 round_nearest=8,
                 block=None):
                 # 类别数1000，宽度值1，即通道数的倍数 1，1.4
        """
        MobileNet V2 main class

        Args:
            num_classes (int): Number of classes
            width_mult (float): Width multiplier - adjusts number of channels in each layer by this amount
            inverted_residual_setting: Network structure
            round_nearest (int): Round the number of channels in each layer to be a multiple of this number
            Set to 1 to turn off rounding
            block: Module specifying inverted residual building block for mobilenet

        """
        super(MobileNetV2, self).__init__()

        if block is None:  #  默认InvertedResidual
            block = InvertedResidual
        input_channel = 32  #第一层 输入通道数
        last_channel = 1280 # 最后一层输出通道数

        if inverted_residual_setting is None:
            inverted_residual_setting = [
                # t, c, n, s  t表示扩张因子，c表示输出的通道数，n表示该Bottleneck重复的次数，s表示卷积的步长
                [1, 16, 1, 1],   # 不扩张， 输出16，重复1次，步长1
                [6, 24, 2, 2],   # 扩张倍数6倍， 输出24，重复2次，步长2
                [6, 32, 3, 2],
                [6, 64, 4, 2],
                [6, 96, 3, 1],
                [6, 160, 3, 2],   # 扩张倍数6倍 ，输出160，重复3次，步长2
                [6, 320, 1, 1],
            ]

        # only check the first element, assuming user knows t,c,n,s are required
        if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4:
            raise ValueError("inverted_residual_setting should be non-empty "
                             "or a 4-element list, got {}".format(inverted_residual_setting))

        # building first layer
        input_channel = _make_divisible(input_channel * width_mult, round_nearest)
        self.last_channel = _make_divisible(last_channel * max(1.0, width_mult), round_nearest)
        features = [ConvBNReLU(3, input_channel, stride=2)]  # 第一层，步长为2
        # building inverted residual blocks
        for t, c, n, s in inverted_residual_setting:  # 遍历参数设置列表
        	# 6, 24, 2, 2
        	# t表示扩张因子，c表示输出的通道数，n表示该Bottleneck重复的次数，s表示卷积的步长
            output_channel = _make_divisible(c * width_mult, round_nearest) # 计算新的输出通道数
            for i in range(n):  # 该行的参数设置重复n（2）次
                stride = s if i == 0 else 1 # 如果重复1次，步长为参数设置的，否则为1
                # 输入通道数，输出通道数，步长，扩展率
                features.append(block(input_channel, output_channel, stride, expand_ratio=t))
                input_channel = output_channel
        # building last several layers  # 最后一层特征提取层 （320,1280,1）
        features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size=1))
        # make it nn.Sequential
        self.features = nn.Sequential(*features)

        # building classifier 建立分类层
        self.classifier = nn.Sequential(
            nn.Dropout(0.2),  # dropout层
            nn.Linear(self.last_channel, num_classes),  # 线性层 （1280.1000）
        )

        # weight initialization  权重初始化
        for m in self.modules():  # modules类型是OrderedDict()字典
            if isinstance(m, nn.Conv2d):  # 卷积层初始化
                nn.init.kaiming_normal_(m.weight, mode='fan_out')  # w用kaiming初始化
                if m.bias is not None:  # b用0初始化
                    nn.init.zeros_(m.bias)
            elif isinstance(m, nn.BatchNorm2d):  # BN层初始化
                nn.init.ones_(m.weight)     # w用1
                nn.init.zeros_(m.bias)  # b用0
            elif isinstance(m, nn.Linear):  # 线性层初始化
                nn.init.normal_(m.weight, 0, 0.01)  # w用normal_
                nn.init.zeros_(m.bias)  #b用0

    def _forward_impl(self, x):
        # This exists since TorchScript doesn't support inheritance, so the superclass method
        # (this one) needs to have a name other than `forward` that can be accessed in a subclass
        x = self.features(x)   # torch.Size([1,3,244,244]) -> torch.Size([1, 1280, 7, 7])
        x = x.mean([2, 3])  # torch.Size([1, 1280]) 对h,w的均值，即求每个通道的值的平均值
        x = self.classifier(x)  # torch.Size([1, 1000])
        return x

    def forward(self, x):
        return self._forward_impl(x)

class Module(object):
	#所有神经网络的基本模块
    r"""Base class for all neural network modules.

    Your models should also subclass this class.

    Modules can also contain other Modules, allowing to nest them in
    a tree structure. You can assign the submodules as regular attributes::

        import torch.nn as nn
        import torch.nn.functional as F

        class Model(nn.Module):
            def __init__(self):
                super(Model, self).__init__()
                self.conv1 = nn.Conv2d(1, 20, 5)
                self.conv2 = nn.Conv2d(20, 20, 5)

            def forward(self, x):
                x = F.relu(self.conv1(x))
                return F.relu(self.conv2(x))

    Submodules assigned in this way will be registered, and will have their
    parameters converted too when you call :meth:`to`, etc.
    """

    dump_patches = False

    r"""This allows better BC support for :meth:`load_state_dict`. In
    :meth:`state_dict`, the version number will be saved as in the attribute
    `_metadata` of the returned state dict, and thus pickled. `_metadata` is a
    dictionary with keys that follow the naming convention of state dict. See
    ``_load_from_state_dict`` on how to use this information in loading.

    If new parameters/buffers are added/removed from a module, this number shall
    be bumped, and the module's `_load_from_state_dict` method can compare the
    version number and do appropriate changes if the state dict is from before
    the change."""
    _version = 1

    def __init__(self):
        """
        Initializes internal Module state, shared by both nn.Module and ScriptModule.
        """
        torch._C._log_api_usage_once("python.nn_module")

        self.training = True
        self._parameters = OrderedDict()  # 字典
        self._buffers = OrderedDict()
        self._backward_hooks = OrderedDict()
        self._forward_hooks = OrderedDict()
        self._forward_pre_hooks = OrderedDict()
        self._state_dict_hooks = OrderedDict()
        self._load_state_dict_pre_hooks = OrderedDict()
        self._modules = OrderedDict()

详解参考：https://zhuanlan.zhihu.com/p/33075914
https://www.jianshu.com/p/47d4235292f7

土士

关注

1
点赞
踩
14

收藏

觉得还不错? 一键收藏
1
评论
mobilenet_v2代码理解

def _make_divisible(v, divisor, min_value=None): """ This function is taken from the original tf repo. It ensures that all layers have a channel number that is divisible by 8 It can b...
复制链接

扫一扫