Resnet.Module代码解读/带看

Blalaaa

已于 2024-09-23 16:05:59 修改

阅读量897

点赞数 32

文章标签：卷积神经网络深度学习 pytorch 人工智能算法

于 2024-09-22 22:32:48 首次发布

本文链接：https://blog.csdn.net/Blalaaa/article/details/142445312

版权

Resnet结构图

1.Resnet.module完整代码

import torch.nn as nn
import torch

class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_channel, out_channel, stride=1, downsample=None, **kwargs):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel,
                               kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channel)
        self.relu = nn.ReLU()
        self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel,
                               kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channel)
        self.downsample = downsample

    def forward(self, x):
        identity = x
        if self.downsample is not None:
            identity = self.downsample(x)

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        out += identity
        out = self.relu(out)

        return out

class Bottleneck(nn.Module):
    
    expansion = 4

    def __init__(self, in_channel, out_channel, stride=1, downsample=None,
                 groups=1, width_per_group=64):
        super(Bottleneck, self).__init__()

        width = int(out_channel * (width_per_group / 64.)) * groups

        self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=width,
                               kernel_size=1, stride=1, bias=False)  # squeeze channels
        self.bn1 = nn.BatchNorm2d(width)
        # -----------------------------------------
        self.conv2 = nn.Conv2d(in_channels=width, out_channels=width, groups=groups,
                               kernel_size=3, stride=stride, bias=False, padding=1)
        self.bn2 = nn.BatchNorm2d(width)
        # -----------------------------------------
        self.conv3 = nn.Conv2d(in_channels=width, out_channels=out_channel*self.expansion,
                               kernel_size=1, stride=1, bias=False)  # unsqueeze channels
        self.bn3 = nn.BatchNorm2d(out_channel*self.expansion)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample

    def forward(self, x):
        identity = x
        if self.downsample is not None:
            identity = self.downsample(x)

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        out += identity
        out = self.relu(out)

        return out

class ResNet(nn.Module):

    def __init__(self,
                 block,
                 blocks_num,
                 num_classes=1000,
                 include_top=True,
                 groups=1,
                 width_per_group=64):
        super(ResNet, self).__init__()
        self.include_top = include_top
        self.in_channel = 64

        self.groups = groups
        self.width_per_group = width_per_group

        self.conv1 = nn.Conv2d(3, self.in_channel, kernel_size=7, stride=2,
                               padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(self.in_channel)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, blocks_num[0])
        self.layer2 = self._make_layer(block, 128, blocks_num[1], stride=2)
        self.layer3 = self._make_layer(block, 256, blocks_num[2], stride=2)
        self.layer4 = self._make_layer(block, 512, blocks_num[3], stride=2)
        if self.include_top:
            self.avgpool = nn.AdaptiveAvgPool2d((1, 1))  # output size = (1, 1)
            self.fc = nn.Linear(512 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')

    def _make_layer(self, block, channel, block_num, stride=1):
        downsample = None
        if stride != 1 or self.in_channel != channel * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(channel * block.expansion))

        layers = []
        layers.append(block(self.in_channel,
                            channel,
                            downsample=downsample,
                            stride=stride,
                            groups=self.groups,
                            width_per_group=self.width_per_group))
        self.in_channel = channel * block.expansion

        for _ in range(1, block_num):
            layers.append(block(self.in_channel,
                                channel))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        if self.include_top:
            x = self.avgpool(x)
            x = torch.flatten(x, 1)
            x = self.fc(x)

        return x

def resnet34(num_classes=1000, include_top=True):
    # <https://download.pytorch.org/models/resnet34-333f7ec4.pth>
    return ResNet(BasicBlock, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top)

def resnet50(num_classes=1000, include_top=True):
    # <https://download.pytorch.org/models/resnet50-19c8e357.pth>
    return ResNet(Bottleneck, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top)

def resnet101(num_classes=1000, include_top=True):
    # <https://download.pytorch.org/models/resnet101-5d3b4d8f.pth>
    return ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes, include_top=include_top)

def resnext50_32x4d(num_classes=1000, include_top=True):
    # <https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth>
    groups = 32
    width_per_group = 4
    return ResNet(Bottleneck, [3, 4, 6, 3],
                  num_classes=num_classes,
                  include_top=include_top,
                  groups=groups,
                  width_per_group=width_per_group)

def resnext101_32x8d(num_classes=1000, include_top=True):
    # <https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth>
    groups = 32
    width_per_group = 8
    return ResNet(Bottleneck, [3, 4, 23, 3],
                  num_classes=num_classes,
                  include_top=include_top,
                  groups=groups,
                  width_per_group=width_per_group)

2.解析

2.1导入必要的库

1.import torch.nn as nn
2.import torch

import torch.nn as nn
- 这行代码导入了 PyTorch 的 torch.nn 模块，并给它起了一个别名 nn。这样做是为了在后续代码中更方便地引用这个模块。torch.nn模块包含了构建神经网络所需的所有构件，比如层（layers）、激活函数（activation functions）和损失函数（loss functions）。
import torch
- 这行代码导入了 PyTorch 的核心模块 torch。这个模块提供了张量（tensors）的操作，这是 PyTorch 中最基本的数据结构，用于表示多维数组。它还提供了自动求导功能，这对于训练神经网络至关重要，因为它可以自动计算梯度。

2.2逻辑结构forward

首先，我们直接从class Resnet(nn.Module)(这是一个模型封装语句)开始了解这个模型如何去构建与实现的：

在 PyTorch 中，forward 方法是定义神经网络模型的核心部分，它指定了模型如何处理输入数据并产生输出。这个方法是每个 nn.Module 子类必须实现的，它定义了数据通过网络的路径。

——模型里能直接表示模型结构的便是forward板块，它表示了模型的逻辑，以及数据流动转换过程，也称前向传播。与论文中的图像结构能够直观对应上：

def forward(self, x):
    # 应用第一个卷积层到输入数据x
    x = self.conv1(x)-----------------------------------conv1
    
    # 应用第一个批量归一化层到卷积层的输出
    x = self.bn1(x)
    
    # 应用ReLU激活函数到批量归一化层的输出
    x = self.relu(x)
    
    # 应用最大池化层以减少特征图的空间维度
    x = self.maxpool(x)---------------------------------max pool

    # 依次通过定义的四个层，每个层可能包含多个卷积层、激活函数等
    x = self.layer1(x)----------------------------------conv2_x
    x = self.layer2(x)----------------------------------conv3_x
    x = self.layer3(x)----------------------------------conv4_x
    x = self.layer4(x)----------------------------------conv4_x

    # 如果模型包含顶部结构（通常用于分类任务）
    if self.include_top:
        # 应用平均池化层以进一步减少特征图的空间维度
        x = self.avgpool(x)
        
        # 展平特征图，以便可以输入到全连接层
        x = torch.flatten(x, 1)
        
        # 应用全连接层以进行最终的分类
        x = self.fc(x)

    # 返回模型的输出
    return x

——简单理解：类，就是定义一个东西。

批量归一化，听说过正态分布吧，自然分布曲线，由于我们的样本小，归一化就是就是使得每个特征通道的数据均值为0，方差为1(许多假设前提：同一分布)

顶部结构，先记住分类啥的需要它就行了

ps.类、批量归一化、顶部结构可以参考主页‘深度学习疑难杂Q’（没有就是还没写，叭重要，慢慢学）

2.3定义层

了解了模型整体结构的实现，我们可能会有很多疑问，参数在哪儿？layer是啥？咋实现？尺寸怎么变换的？等等一系列问题。让我们一步一步来，假设我们在写代码，下一步应该就是去思考定义layer层了

这是来自某站的一位博主手撕resnet的代码：

(【ResNet残差神经网络硬核讲解(带你手撸ResNet代码)，从模型构建到训练、推理、可视化-哔哩哔哩】 ResNet残差神经网络硬核讲解(带你手撸ResNet代码)，从模型构建到训练、推理、可视化_哔哩哔哩_bilibili)

讲得很好，可以搭配食用。本文从拿到代码然后读，视频从头开始设计代码，某些细节不懂的可以看原视频。

make layer

def _make_layer(self, block, channel, block_num, stride=1):
        downsample = None------------------------------------------------我们看结构图设计会发现，outputsize在每层传递时逐渐减少，这通过下采样实现
        if stride != 1 or self.in_channel != channel * block.expansion:--？这是针对resnet50及以上设计的
            downsample = nn.Sequential(
                nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(channel * block.expansion))

        layers = []------------------------------------------------------存储残差块：conv2_x/3/4/5
        layers.append(block(self.in_channel,-----------------------------？添加第一个残差块
                            channel,
                            downsample=downsample,
                            stride=stride,
                            groups=self.groups,
                            width_per_group=self.width_per_group))
        self.in_channel = channel * block.expansion

        for _ in range(1, block_num):------------------------------------for循环实现连接conv2_x+conv3_x+conv4_x+conv5_x
            layers.append(block(self.in_channel,
                                channel))

        return nn.Sequential(*layers)

——？你会发现50开始，第一个残差块中第一行为64，第三行为256，这就是通道数，他们不一致，所以要对通道数进行扩充-expansion倍数关系(模型设计的就是要进行扩充)。用if写在这里是因为：1️⃣resnet50及以上才需要启用这一部分。2️⃣由于resnet的核心设计，即便我们不使用某一残差块(2/3/4)，例如直接从2->4我们可以直接通过扩充，符合下一层的输入通道数。严格的来说，这里不叫下采样。取啥名是个人的自由，不过不严谨

？残差块：

——重点：self：指向类实例/对象的引用。即在你的代码文件里自己定义的一个东西。可以随意定义，就像我的‘模型’叫做模型，不一定是你想的模型，或是计算机中定义的模型，但我就是引用了模型这个词。

downsample降/下采样指减少数据的空间维度（例如图像的宽度和高度）的过程，同时保持或压缩特征信息，可以有很多方式实现，有的会同时改变通道数，使用步长大于1的卷积核时，可以同时减少空间维度和改变通道数。这就是为什么resnet50在定义层第一层输出通道256在下一层输入时变成了128

block：一个残差块的类或函数，用于构建残差网络的基本单元

append 用于向列表（list）的末尾添加一个元素，加的意思

nn.Conv2d nn.BatchNorm2d 卷积与批量归一化，2d表示二维数据

是不是有点懵，更多的东西出现了：self，block，还有一大堆没学过的函数。不要紧，请对照下面的注释尝试理解它们：

def _make_layer(self, block, channel, block_num, stride=1):
- 定义了一个方法 _make_layer，它接收以下参数：
  - self：指向类实例/对象的引用。
  - block：一个残差块的类或函数，用于构建残差网络的基本单元。
  - channel：当前层的通道数。
  - block_num：该层中残差块的数量。
  - stride：卷积层的步长，默认为 1。
downsample = None
- 初始化一个变量 downsample，用于存储可能需要的降采样操作。
if stride != 1 or self.in_channel != channel * block.expansion:
- 检查是否需要降采样。如果步长不为 1 或者当前输入通道数与扩展后的输出通道数不匹配，则需要降采样。
downsample = nn.Sequential()
- 创建一个 nn.Sequential 容器，用于存储降采样操作的层。
nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False),
- 添加一个卷积层用于降采样，它将输入通道数转换为扩展后的输出通道数，使用 1x1 卷积核，设置步长和不带偏置。
nn.BatchNorm2d(channel * block.expansion))
- 添加一个批量归一化层，用于规范化降采样后的输出。
layers = []
- 初始化一个列表 layers，用于存储该层中的所有残差块。
layers.append(block(...))
- 向 layers 列表中添加第一个残差块，它可能包含降采样操作。
self.in_channel = channel * block.expansion
- 更新 self.in_channel 为当前层的输出通道数，以便后续残差块使用。
for _ in range(1, block_num):
- 通过一个循环添加剩余的残差块，从第二个开始，因为第一个已经添加。
layers.append(block(...))
- 向 layers 列表中添加后续的残差块。
return nn.Sequential(*layers)
- 返回一个 nn.Sequential 容器，它按顺序包含了所有的残差块。

请不要现在尝试去理解为什么要返回sequential，为啥顺序是这样的，为什么要把outputsize减少，output size和通道数啥关系(最后怎么都转换成向量了)，为什么50以上要扩充通道数又给它压回去…有些是比较重要的，有些有可能没有那么重要。咱先把大体理解清楚了，还有精力的话，一点点填充知识体系。

2.4Block

现在你会发现，残差块里面的结构你不知道如何实现的，而且通道数你可能对应不上，不知道它如何变化，因为resnet50及以上和resnet50以下的结构好像是不同的。不过这两部分各自之间的块又是相同的，好像得分开定义。

但实际上，如果block已知，除了初始化，整个resnet结构基本上就完成了。现在你不知道block是什么，于是找到一下两个板块(定义了两个类，这是因为可以很好的管理与操作，如果直接定义在resent里，你如果哪里有报错或者需要修改增添卷积，十分不好调整，需要查看的东西特别多，结构不清晰。)：

BasicBlock

class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_channel, out_channel, stride=1, downsample=None, **kwargs):-初始化，定义每一个卷积
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel,
                               kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channel)
        self.relu = nn.ReLU()
        self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel,
                               kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channel)
        self.downsample = downsample

    def forward(self, x):-------------------------------------------------------------定义结构，数据传递
        identity = x
        if self.downsample is not None:
            identity = self.downsample(x)

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        out += identity--------------------------------------------------------------核心
        out = self.relu(out)

        return out

Bottleneck

class Bottleneck(nn.Module):
    """
    注意：原论文中，在虚线残差结构的主分支上，第一个1x1卷积层的步距是2，第二个3x3卷积层步距是1。
    但在pytorch官方实现过程中是第一个1x1卷积层的步距是1，第二个3x3卷积层步距是2，
    这么做的好处是能够在top1上提升大概0.5%的准确率。
    可参考Resnet v1.5 <https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch>
    """
    expansion = 4------------------------------------------------------------------------------------扩充因子

    def __init__(self, in_channel, out_channel, stride=1, downsample=None,---------------------------定义卷积
                 groups=1, width_per_group=64):
        super(Bottleneck, self).__init__()

        width = int(out_channel * (width_per_group / 64.)) * groups

        self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=width,
                               kernel_size=1, stride=1, bias=False)  # squeeze channels
        self.bn1 = nn.BatchNorm2d(width)
        # -----------------------------------------
        self.conv2 = nn.Conv2d(in_channels=width, out_channels=width, groups=groups,
                               kernel_size=3, stride=stride, bias=False, padding=1)
        self.bn2 = nn.BatchNorm2d(width)
        # -----------------------------------------
        self.conv3 = nn.Conv2d(in_channels=width, out_channels=out_channel*self.expansion,
                               kernel_size=1, stride=1, bias=False)  # unsqueeze channels
        self.bn3 = nn.BatchNorm2d(out_channel*self.expansion)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample

    def forward(self, x):----------------------------------------------------------------------------逻辑过程，数据传递
        identity = x
        if self.downsample is not None:
            identity = self.downsample(x)

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        out += identity
        out = self.relu(out)

        return out

2.5init初始化

参考以上两个类的定义，我们来到最后一部分，resnet的初始化：

至此，resnet的整体结构就看完啦（感动）

3.resnet的不同版本实现

def resnet34(num_classes=1000, include_top=True):
    # <https://download.pytorch.org/models/resnet34-333f7ec4.pth>
    return ResNet(BasicBlock, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top)

def resnet50(num_classes=1000, include_top=True):
    # <https://download.pytorch.org/models/resnet50-19c8e357.pth>
    return ResNet(Bottleneck, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top)

def resnet101(num_classes=1000, include_top=True):
    # <https://download.pytorch.org/models/resnet101-5d3b4d8f.pth>
    return ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes, include_top=include_top)

4.resnet的使用

更改全联接层，实现不同类别数的分类(默认1000类)

# 加载预训练的ResNet-50模型
model = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V1)

# 替换最后的全连接层以适配我们的分类问题
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, len(class_names))