2.6.6 YOLOV5_ConvNeXt代码注释

最新推荐文章于 2023-10-26 22:55:15 发布

YANQ662

最新推荐文章于 2023-10-26 22:55:15 发布

阅读量152

点赞数

分类专栏： 1.Yolov5解释文章标签： YOLO python 开发语言人工智能机器学习深度学习

本文链接：https://blog.csdn.net/weixin_71719718/article/details/132261870

版权

1.Yolov5解释专栏收录该内容

14 篇文章 1 订阅

订阅专栏

代码注释如下：

import math
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

#1.dropout函数，消除梯度消失的作用
  #drop_prob（0，1）：在一批样本中，不参与计算的样本的概率
def drop_path(x, drop_prob: float = 0., training: bool = False, scale_by_keep: bool = True):

    #如果不参与计算的样本的概率为0， 则返回样本本身
    if drop_prob == 0. or not training:
        return x

    # keep_prob：在一批样本中，参与计算的样本的概率
    keep_prob = 1 - drop_prob

    #shape：样本的特征维数，比如4维样本[n,h,x,y],则其shape为[n,1,1,1]
    shape = (x.shape[0],) + (1,) * (x.ndim - 1)

    #根据参与计算的样本的概率keep_prob，依据泊松分布随机获得参与计算和不参与计算的样本，比如样本总数n=3，参与计算的位置为0,2，不参与计算的位置为1，
    #则输出值为[[[1]],[[0]],[[1]]]，即对参与计算的赋值为1，不参与计算的赋值为0
    random_tensor = x.new_empty(shape).bernoulli_(keep_prob)

    #如果参与计算的概率大于0， 则对上面的random_tensor乘以2，此时random_tensor=[[[2]],[[0]],[[2]]]
    if keep_prob > 0.0 and scale_by_keep:
        random_tensor.div_(keep_prob)
    #x * random_tensor：表示对不参与计算的样本（图片）的矩阵的所有值修改为0，即导数为0，对参与计算的样本（图片）的矩阵的所有值乘以2，对其方法2倍。
    return x * random_tensor


class DropPath(nn.Module):
    """
    Drop paths (Stochastic Depth) per sample  (when applied in main path of residual blocks).
    """

    def __init__(self, drop_prob=None, scale_by_keep=True):
        super(DropPath, self).__init__()
        self.drop_prob = drop_prob
        self.scale_by_keep = scale_by_keep

    def forward(self, x):
        return drop_path(x, self.drop_prob, self.training, self.scale_by_keep)

def trunc_normal_(tensor, mean=0., std=1., a=-2., b=2.):
    def _no_grad_trunc_normal_(tensor, mean, std, a, b):
        def norm_cdf(x):
            return (1. + math.erf(x / math.sqrt(2.))) / 2.

        with torch.no_grad():
            l = norm_cdf((a - mean) / std)
            u = norm_cdf((b - mean) / std)

            tensor.uniform_(2 * l - 1, 2 * u - 1)
            tensor.erfinv_()

            tensor.mul_(std * math.sqrt(2.))
            tensor.add_(mean)

            tensor.clamp_(min=a, max=b)
            return tensor
    return _no_grad_trunc_normal_(tensor, mean, std, a, b)


# --------------------------------------#
#   2.Gelu激活函数的实现
#   利用近似的数学公式
# --------------------------------------#
class GELU(nn.Module):
    def __init__(self):
        super(GELU, self).__init__()

    def forward(self, x):
        return 0.5 * x * (1 + torch.tanh(np.sqrt(2 / np.pi) * (x + 0.044715 * torch.pow(x, 3))))


# ---------------------------------------------------------------------------------#
#   LayerNorm 支持两种形式channels_last (default) or channels_first.
#   channels_last   对应具有形状的输入(batch_size, height, width, channels)
#   channels_first  对应具有形状的输入(batch_size, channels, height, width).
# ---------------------------------------------------------------------------------#

#3.LN的标准化，表示对某一层的归一化（相对于BN而言）。就是对每张图片的行的所有元素进行归一化
#BN的标准化，表示对批次里所有样本（图片）的列进行归一化
class LayerNorm(nn.Module):
    # normalized_shape=5表示图片有多少列，即某一行的所有元素
    def __init__(self, normalized_shape, eps=1e-6, data_format="channels_last"):
        super().__init__()

        # 对3维图片的列进行归一化，注意：对于3通道的图片，每个元素是1X3维的值，相当于对img.shape[k,:](第k行)的所有元素求归一化
        self.weight = nn.Parameter(torch.ones(normalized_shape))

        # Layer_Norm的归一化，加了一个偏差值bias
        self.bias = nn.Parameter(torch.zeros(normalized_shape))

        # Layer_Norm的归一化，为了防止分母等于0，分母加了一个很小的大于0的值bias
        self.eps = eps
        self.data_format = data_format

        if self.data_format not in ["channels_last", "channels_first"]:
            raise NotImplementedError
        self.normalized_shape = (normalized_shape,)

    def forward(self, x):

        # Layer_Norm的归一化函数，normalized_shape表示图片的列数，即img.shape[1]
        if self.data_format == "channels_last":
            return F.layer_norm(x, self.normalized_shape, self.weight, self.bias, self.eps)


        elif self.data_format == "channels_first":
            # 以下是LN归一化的公式

            # x的均值
            u = x.mean(1, keepdim=True)

            # (x-u)的平方再求均值，类似x的方差
            s = (x - u).pow(2).mean(1, keepdim=True)

            # 参考分母带根号的公式
            x = (x - u) / torch.sqrt(s + self.eps)

            # 将上面的值赋值给weight，再加上偏差bias
            x = self.weight[:, None, None] * x + self.bias[:, None, None]
            return x

# --------------------------------------------------------------------------------------------------------------#
#   4.ConvNeXt Block有两种等效的实现:
#   (1) DwConv -> LayerNorm (channels_first) -> 1x1 Conv -> GELU -> 1x1 Conv; all in (N, C, H, W)
#   (2) DwConv -> Permute to (N, H, W, C); LayerNorm (channels_last) -> Linear -> GELU -> Linear; Permute back
#   代码中使用（2），因为这个在PyTorch中稍微快一点
# --------------------------------------------------------------------------------------------------------------#
class Block(nn.Module):
    def __init__(self, dim, drop_path=0., layer_scale_init_value=1e-6):
        super().__init__()
        #--------------------------#
        #   7x7的逐层卷积
        #--------------------------#
        self.dwconv=nn.Conv2d(dim,dim,kernel_size=7,padding=3,groups=dim)
        self.norm=LayerNorm(dim,eps=1e-6)
        # --------------------------#
        #   利用全连接层代替1x1卷积
        # --------------------------#
        self.pwconv1=nn.Linear(dim,4*dim)
        self.act=GELU()
        # --------------------------#
        #   利用全连接层代替1x1卷积
        # --------------------------#
        self.pwconv2 = nn.Linear(4 * dim, dim)
        # --------------------------#
        #   加入缩放系数
        # --------------------------#
        self.gamma = nn.Parameter(layer_scale_init_value * torch.ones((dim)),
                                  requires_grad=True) if layer_scale_init_value > 0 else None
        # --------------------------#
        #   加入Drop_path正则化
        # --------------------------#
        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()


    def forward(self, x):
        input = x
        #--------------------------#
        #   7x7的逐层卷积
        #--------------------------#
        x = self.dwconv(x)
        x=x.permute(0,2,3,1)# (N, C, H, W) -> (N, H, W, C)
        x=self.norm(x)
        # --------------------------#
        #   利用全连接层代替1x1卷积
        # --------------------------#
        x = self.pwconv1(x)
        x = self.act(x)
        # --------------------------#
        #   利用全连接层代替1x1卷积
        # --------------------------#
        x = self.pwconv2(x)
        # --------------------------#
        #   加入缩放系数
        # --------------------------#
        if self.gamma is not None:
            x = self.gamma * x
        x = x.permute(0, 3, 1, 2)  # (N, H, W, C) -> (N, C, H, W)
        # --------------------------#
        #   加入Drop_path正则化
        # --------------------------#
        x = input + self.drop_path(x)
        return x

#5.ConvNeXt网络
class ConvNeXt(nn.Module):
    def __init__(
        self, in_chans=3, num_classes=1000, depths=[3, 3, 9, 3], dims=[96, 192, 384, 768],
        drop_path_rate=0., layer_scale_init_value=1e-6, head_init_scale=1., **kwargs
    ):
        super().__init__()
        self.downsample_layers =nn.ModuleList()
        self.downsample_layers = nn.ModuleList()
        # --------------------------------------------------#
        #   bs, 3, 224, 224 -> bs, 96, 56, 56
        # --------------------------------------------------#
        stem = nn.Sequential(
            #输入通道为3，输出通道为96，卷积核大小为4，步长为4，卷积后图片大小缩小4倍
            nn.Conv2d(in_chans, dims[0], kernel_size=4, stride=4),
            #输入通道为96，对每个通道进行LN归一化
            LayerNorm(dims[0], eps=1e-6, data_format="channels_first")
        )
        self.downsample_layers.append(stem)

        # --------------------------------------------------#
        #   定义三次下采样的过程
        #   利用步长为2x2，卷积核大小为2x2的卷积进行下采样，每次下采样后图片大小缩小2倍，通道数增加2倍
        # --------------------------------------------------#
        for i in range(3):
            downsample_layer = nn.Sequential(
                LayerNorm(dims[i], eps=1e-6, data_format="channels_first"),
                nn.Conv2d(dims[i], dims[i+1], kernel_size=2, stride=2),
            )
            self.downsample_layers.append(downsample_layer)
        # --------------------------------------------------#
        #   根据深度的不同，定义不同的drop率
        # --------------------------------------------------#
        self.stages = nn.ModuleList()
        dp_rates = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))]
        #print(">>>>>",dp_rates)
        cur = 0
        # --------------------------------------------------#
        #   整个ConvNeXt除了Stem外，存在四个Stage
        #   每个Stage里面是多个ConvNeXt Block的堆叠。
        # --------------------------------------------------#
        for i in range(4):
            stage = nn.Sequential(
                *[Block(dim=dims[i], drop_path=dp_rates[cur + j], layer_scale_init_value=layer_scale_init_value) for
                  j in range(depths[i])]
            )
            self.stages.append(stage)
            cur += depths[i]
            #print(">>>>>>>>>>>>>>>>",cur)

        self.apply(self._init_weights)



    def _init_weights(self, m):
        if isinstance(m, (nn.Conv2d, nn.Linear)):
            trunc_normal_(m.weight, std=.02)
            nn.init.constant_(m.bias, 0)

    def forward(self, x):
        outs = []
        for i in range(4):
            x = self.downsample_layers[i](x)
            x = self.stages[i](x)
            if i != 0:
                outs.append(x)
        return outs


if __name__ == "__main__":

    input=torch.randn(4,3,56,57)
    ss=ConvNeXt()
    s1=ss(input)
    #print(s1)
    print(len(s1[0][0][0][0]))
    print(s1[0][0][0][0])

YANQ662

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
2.6.6 YOLOV5_ConvNeXt代码注释

x * random_tensor：表示对不参与计算的样本（图片）的矩阵的所有值修改为0，即导数为0，对参与计算的样本（图片）的矩阵的所有值乘以2，对其方法2倍。#根据参与计算的样本的概率keep_prob，依据泊松分布随机获得参与计算和不参与计算的样本，比如样本总数n=3，参与计算的位置为0,2，不参与计算的位置为1，#则输出值为[[[1]],[[0]],[[1]]]，即对参与计算的赋值为1，不参与计算的赋值为0。#drop_prob（0，1）：在一批样本中，不参与计算的样本的概率。
复制链接

扫一扫

专栏目录