Resnet结构图
1.Resnet.module完整代码
import torch.nn as nn
import torch
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, in_channel, out_channel, stride=1, downsample=None, **kwargs):
super(BasicBlock, self).__init__()
self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel,
kernel_size=3, stride=stride, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(out_channel)
self.relu = nn.ReLU()
self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel,
kernel_size=3, stride=1, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(out_channel)
self.downsample = downsample
def forward(self, x):
identity = x
if self.downsample is not None:
identity = self.downsample(x)
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out += identity
out = self.relu(out)
return out
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, in_channel, out_channel, stride=1, downsample=None,
groups=1, width_per_group=64):
super(Bottleneck, self).__init__()
width = int(out_channel * (width_per_group / 64.)) * groups
self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=width,
kernel_size=1, stride=1, bias=False) # squeeze channels
self.bn1 = nn.BatchNorm2d(width)
# -----------------------------------------
self.conv2 = nn.Conv2d(in_channels=width, out_channels=width, groups=groups,
kernel_size=3, stride=stride, bias=False, padding=1)
self.bn2 = nn.BatchNorm2d(width)
# -----------------------------------------
self.conv3 = nn.Conv2d(in_channels=width, out_channels=out_channel*self.expansion,
kernel_size=1, stride=1, bias=False) # unsqueeze channels
self.bn3 = nn.BatchNorm2d(out_channel*self.expansion)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
def forward(self, x):
identity = x
if self.downsample is not None:
identity = self.downsample(x)
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
out += identity
out = self.relu(out)
return out
class ResNet(nn.Module):
def __init__(self,
block,
blocks_num,
num_classes=1000,
include_top=True,
groups=1,
width_per_group=64):
super(ResNet, self).__init__()
self.include_top = include_top
self.in_channel = 64
self.groups = groups
self.width_per_group = width_per_group
self.conv1 = nn.Conv2d(3, self.in_channel, kernel_size=7, stride=2,
padding=3, bias=False)
self.bn1 = nn.BatchNorm2d(self.in_channel)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, blocks_num[0])
self.layer2 = self._make_layer(block, 128, blocks_num[1], stride=2)
self.layer3 = self._make_layer(block, 256, blocks_num[2], stride=2)
self.layer4 = self._make_layer(block, 512, blocks_num[3], stride=2)
if self.include_top:
self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) # output size = (1, 1)
self.fc = nn.Linear(512 * block.expansion, num_classes)
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
def _make_layer(self, block, channel, block_num, stride=1):
downsample = None
if stride != 1 or self.in_channel != channel * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(channel * block.expansion))
layers = []
layers.append(block(self.in_channel,
channel,
downsample=downsample,
stride=stride,
groups=self.groups,
width_per_group=self.width_per_group))
self.in_channel = channel * block.expansion
for _ in range(1, block_num):
layers.append(block(self.in_channel,
channel))
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
if self.include_top:
x = self.avgpool(x)
x = torch.flatten(x, 1)
x = self.fc(x)
return x
def resnet34(num_classes=1000, include_top=True):
# <https://download.pytorch.org/models/resnet34-333f7ec4.pth>
return ResNet(BasicBlock, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top)
def resnet50(num_classes=1000, include_top=True):
# <https://download.pytorch.org/models/resnet50-19c8e357.pth>
return ResNet(Bottleneck, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top)
def resnet101(num_classes=1000, include_top=True):
# <https://download.pytorch.org/models/resnet101-5d3b4d8f.pth>
return ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes, include_top=include_top)
def resnext50_32x4d(num_classes=1000, include_top=True):
# <https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth>
groups = 32
width_per_group = 4
return ResNet(Bottleneck, [3, 4, 6, 3],
num_classes=num_classes,
include_top=include_top,
groups=groups,
width_per_group=width_per_group)
def resnext101_32x8d(num_classes=1000, include_top=True):
# <https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth>
groups = 32
width_per_group = 8
return ResNet(Bottleneck, [3, 4, 23, 3],
num_classes=num_classes,
include_top=include_top,
groups=groups,
width_per_group=width_per_group)
2.解析
2.1导入必要的库
1.import torch.nn as nn
2.import torch
import torch.nn as nn
- 这行代码导入了 PyTorch 的
torch.nn
模块,并给它起了一个别名nn
。这样做是为了在后续代码中更方便地引用这个模块。torch.nn
模块包含了构建神经网络所需的所有构件,比如层(layers)、激活函数(activation functions)和损失函数(loss functions)。
- 这行代码导入了 PyTorch 的
import torch
- 这行代码导入了 PyTorch 的核心模块
torch
。这个模块提供了张量(tensors)的操作,这是 PyTorch 中最基本的数据结构,用于表示多维数组。它还提供了自动求导功能,这对于训练神经网络至关重要,因为它可以自动计算梯度。
- 这行代码导入了 PyTorch 的核心模块
2.2逻辑结构forward
首先,我们直接从class Resnet(nn.Module)
(这是一个模型封装语句)开始了解这个模型如何去构建与实现的:
在 PyTorch 中,forward
方法是定义神经网络模型的核心部分,它指定了模型如何处理输入数据并产生输出。这个方法是每个 nn.Module
子类必须实现的,它定义了数据通过网络的路径。
——模型里能直接表示模型结构的便是forward板块,它表示了模型的逻辑,以及数据流动转换过程,也称前向传播。与论文中的图像结构能够直观对应上:
def forward(self, x):
# 应用第一个卷积层到输入数据x
x = self.conv1(x)-----------------------------------conv1
# 应用第一个批量归一化层到卷积层的输出
x = self.bn1(x)
# 应用ReLU激活函数到批量归一化层的输出
x = self.relu(x)
# 应用最大池化层以减少特征图的空间维度
x = self.maxpool(x)---------------------------------max pool
# 依次通过定义的四个层,每个层可能包含多个卷积层、激活函数等
x = self.layer1(x)----------------------------------conv2_x
x = self.layer2(x)----------------------------------conv3_x
x = self.layer3(x)----------------------------------conv4_x
x = self.layer4(x)----------------------------------conv4_x
# 如果模型包含顶部结构(通常用于分类任务)
if self.include_top:
# 应用平均池化层以进一步减少特征图的空间维度
x = self.avgpool(x)
# 展平特征图,以便可以输入到全连接层
x = torch.flatten(x, 1)
# 应用全连接层以进行最终的分类
x = self.fc(x)
# 返回模型的输出
return x
——简单理解:类,就是定义一个东西。
批量归一化,听说过正态分布吧,自然分布曲线,由于我们的样本小,归一化就是就是使得每个特征通道的数据均值为0,方差为1(许多假设前提:同一分布)
顶部结构,先记住分类啥的需要它就行了
ps.类、批量归一化、顶部结构可以参考主页‘深度学习疑难杂Q’(没有就是还没写,叭重要,慢慢学)
2.3定义层
了解了模型整体结构的实现,我们可能会有很多疑问,参数在哪儿?layer是啥?咋实现?尺寸怎么变换的?等等一系列问题。让我们一步一步来,假设我们在写代码,下一步应该就是去思考定义layer层了
这是来自某站的一位博主手撕resnet的代码:
(【ResNet残差神经网络硬核讲解(带你手撸ResNet代码),从模型构建到训练、推理、可视化-哔哩哔哩】 ResNet残差神经网络硬核讲解(带你手撸ResNet代码),从模型构建到训练、推理、可视化_哔哩哔哩_bilibili)
讲得很好,可以搭配食用。本文从拿到代码然后读,视频从头开始设计代码,某些细节不懂的可以看原视频。
make layer
def _make_layer(self, block, channel, block_num, stride=1):
downsample = None------------------------------------------------我们看结构图设计会发现,outputsize在每层传递时逐渐减少,这通过下采样实现
if stride != 1 or self.in_channel != channel * block.expansion:--?这是针对resnet50及以上设计的
downsample = nn.Sequential(
nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(channel * block.expansion))
layers = []------------------------------------------------------存储残差块:conv2_x/3/4/5
layers.append(block(self.in_channel,-----------------------------?添加第一个残差块
channel,
downsample=downsample,
stride=stride,
groups=self.groups,
width_per_group=self.width_per_group))
self.in_channel = channel * block.expansion
for _ in range(1, block_num):------------------------------------for循环实现连接conv2_x+conv3_x+conv4_x+conv5_x
layers.append(block(self.in_channel,
channel))
return nn.Sequential(*layers)
——?你会发现50开始,第一个残差块中第一行为64,第三行为256,这就是通道数,他们不一致,所以要对通道数进行扩充-expansion倍数关系(模型设计的就是要进行扩充)。用if写在这里是因为:1️⃣resnet50及以上才需要启用这一部分。2️⃣由于resnet的核心设计,即便我们不使用某一残差块(2/3/4),例如直接从2->4我们可以直接通过扩充,符合下一层的输入通道数。严格的来说,这里不叫下采样。取啥名是个人的自由,不过不严谨
?残差块:
——重点:self
:指向类实例/对象的引用。即在你的代码文件里自己定义的一个东西。可以随意定义,就像我的‘模型’叫做模型,不一定是你想的模型,或是计算机中定义的模型,但我就是引用了模型这个词。
downsample降/下采样指减少数据的空间维度(例如图像的宽度和高度)的过程,同时保持或压缩特征信息,可以有很多方式实现,有的会同时改变通道数,使用步长大于1的卷积核时,可以同时减少空间维度和改变通道数。这就是为什么resnet50在定义层第一层输出通道256在下一层输入时变成了128
block:一个残差块的类或函数,用于构建残差网络的基本单元
append 用于向列表(list)的末尾添加一个元素,加的意思
nn.Conv2d nn.BatchNorm2d 卷积与批量归一化,2d表示二维数据
是不是有点懵,更多的东西出现了:self,block,还有一大堆没学过的函数。不要紧,请对照下面的注释尝试理解它们:
def _make_layer(self, block, channel, block_num, stride=1):
- 定义了一个方法
_make_layer
,它接收以下参数:self
:指向类实例/对象的引用。block
:一个残差块的类或函数,用于构建残差网络的基本单元。channel
:当前层的通道数。block_num
:该层中残差块的数量。stride
:卷积层的步长,默认为 1。
- 定义了一个方法
downsample = None
- 初始化一个变量
downsample
,用于存储可能需要的降采样操作。
- 初始化一个变量
if stride != 1 or self.in_channel != channel * block.expansion:
- 检查是否需要降采样。如果步长不为 1 或者当前输入通道数与扩展后的输出通道数不匹配,则需要降采样。
downsample = nn.Sequential()
- 创建一个
nn.Sequential
容器,用于存储降采样操作的层。
- 创建一个
nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False),
- 添加一个卷积层用于降采样,它将输入通道数转换为扩展后的输出通道数,使用 1x1 卷积核,设置步长和不带偏置。
nn.BatchNorm2d(channel * block.expansion))
- 添加一个批量归一化层,用于规范化降采样后的输出。
layers = []
- 初始化一个列表
layers
,用于存储该层中的所有残差块。
- 初始化一个列表
layers.append(block(...))
- 向
layers
列表中添加第一个残差块,它可能包含降采样操作。
- 向
self.in_channel = channel * block.expansion
- 更新
self.in_channel
为当前层的输出通道数,以便后续残差块使用。
- 更新
for _ in range(1, block_num):
- 通过一个循环添加剩余的残差块,从第二个开始,因为第一个已经添加。
layers.append(block(...))
- 向
layers
列表中添加后续的残差块。
- 向
return nn.Sequential(*layers)
- 返回一个
nn.Sequential
容器,它按顺序包含了所有的残差块。
- 返回一个
请不要现在尝试去理解为什么要返回sequential,为啥顺序是这样的,为什么要把outputsize减少,output size和通道数啥关系(最后怎么都转换成向量了),为什么50以上要扩充通道数又给它压回去…有些是比较重要的,有些有可能没有那么重要。咱先把大体理解清楚了,还有精力的话,一点点填充知识体系。
2.4Block
现在你会发现,残差块里面的结构你不知道如何实现的,而且通道数你可能对应不上,不知道它如何变化,因为resnet50及以上和resnet50以下的结构好像是不同的。不过这两部分各自之间的块又是相同的,好像得分开定义。
但实际上,如果block已知,除了初始化,整个resnet结构基本上就完成了。现在你不知道block是什么,于是找到一下两个板块(定义了两个类,这是因为可以很好的管理与操作,如果直接定义在resent里,你如果哪里有报错或者需要修改增添卷积,十分不好调整,需要查看的东西特别多,结构不清晰。):
BasicBlock
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, in_channel, out_channel, stride=1, downsample=None, **kwargs):-初始化,定义每一个卷积
super(BasicBlock, self).__init__()
self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel,
kernel_size=3, stride=stride, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(out_channel)
self.relu = nn.ReLU()
self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel,
kernel_size=3, stride=1, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(out_channel)
self.downsample = downsample
def forward(self, x):-------------------------------------------------------------定义结构,数据传递
identity = x
if self.downsample is not None:
identity = self.downsample(x)
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out += identity--------------------------------------------------------------核心
out = self.relu(out)
return out
Bottleneck
class Bottleneck(nn.Module):
"""
注意:原论文中,在虚线残差结构的主分支上,第一个1x1卷积层的步距是2,第二个3x3卷积层步距是1。
但在pytorch官方实现过程中是第一个1x1卷积层的步距是1,第二个3x3卷积层步距是2,
这么做的好处是能够在top1上提升大概0.5%的准确率。
可参考Resnet v1.5 <https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch>
"""
expansion = 4------------------------------------------------------------------------------------扩充因子
def __init__(self, in_channel, out_channel, stride=1, downsample=None,---------------------------定义卷积
groups=1, width_per_group=64):
super(Bottleneck, self).__init__()
width = int(out_channel * (width_per_group / 64.)) * groups
self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=width,
kernel_size=1, stride=1, bias=False) # squeeze channels
self.bn1 = nn.BatchNorm2d(width)
# -----------------------------------------
self.conv2 = nn.Conv2d(in_channels=width, out_channels=width, groups=groups,
kernel_size=3, stride=stride, bias=False, padding=1)
self.bn2 = nn.BatchNorm2d(width)
# -----------------------------------------
self.conv3 = nn.Conv2d(in_channels=width, out_channels=out_channel*self.expansion,
kernel_size=1, stride=1, bias=False) # unsqueeze channels
self.bn3 = nn.BatchNorm2d(out_channel*self.expansion)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
def forward(self, x):----------------------------------------------------------------------------逻辑过程,数据传递
identity = x
if self.downsample is not None:
identity = self.downsample(x)
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
out += identity
out = self.relu(out)
return out
2.5init初始化
参考以上两个类的定义,我们来到最后一部分,resnet的初始化:
至此,resnet的整体结构就看完啦(感动)
3.resnet的不同版本实现
def resnet34(num_classes=1000, include_top=True):
# <https://download.pytorch.org/models/resnet34-333f7ec4.pth>
return ResNet(BasicBlock, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top)
def resnet50(num_classes=1000, include_top=True):
# <https://download.pytorch.org/models/resnet50-19c8e357.pth>
return ResNet(Bottleneck, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top)
def resnet101(num_classes=1000, include_top=True):
# <https://download.pytorch.org/models/resnet101-5d3b4d8f.pth>
return ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes, include_top=include_top)
4.resnet的使用
更改全联接层,实现不同类别数的分类(默认1000类)
# 加载预训练的ResNet-50模型
model = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V1)
# 替换最后的全连接层以适配我们的分类问题
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, len(class_names))