一、input stem
if self.deep_stem: # input stem为3个3*3卷积快
x = self.stem(x)
else: # input stem为1个7*7卷积块
x = self.conv1(x)
x = self.norm1(x)
x = self.relu(x)
x = self.maxpool(x)
def __init__(self,
deep_stem=False # 是否使用3个3*3卷积替代7*7卷积
):
self.deep_stem = deep_stem
其中,self.stem
、self.conv1
、self.norm1
、self.relu
、self.maxpool
都是在make_stage_plugins
函数中构建的。
"""_make_stem_layer函数在__init__函数中被调用,input stem中的各层在初始化时就已经被创建好了"""
self._make_stem_layer(in_channels, stem_channels)
def _make_stem_layer(self, in_channels, stem_channels):
"""构造 input stem
input stem部分共进行2次下采样,分别在第1个3*3卷积或者7*7卷积和maxpool中
"""
if self.deep_stem: #根据self.deep_stem选择构造3*3卷积或者7*7卷积
# 构建3个3*3的卷积块(conv+BN+ReLU)
self.stem = nn.Sequential(
build_conv_layer(
self.conv_cfg,
in_channels,
stem_channels // 2, # 通道数由in_channels变为stem_channels//2
kernel_size=3,
stride=2, # 下采样
padding=1,
bias=False),
build_norm_layer(self.norm_cfg, stem_channels // 2)[1],
nn.ReLU(inplace=True),
build_conv_layer(
self.conv_cfg,
stem_channels // 2,
stem_channels // 2, # 通道数由stem_channels//2变为stem_channels//2
kernel_size=3,
stride=1,
padding=1,
bias=False),
build_norm_layer(self.norm_cfg, stem_channels // 2)[1],
nn.ReLU(inplace=True),
build_conv_layer(
self.conv_cfg,
stem_channels // 2, # 通道数由stem_channels//2变为stem_channels
stem_channels,
kernel_size=3,
stride=1,
padding=1,
bias=False),
build_norm_layer(self.norm_cfg, stem_channels)[1],
nn.ReLU(inplace=True))
else:
# 构建1个7*7的卷积块(conv+BN+ReLU)
self.conv1 = build_conv_layer(
self.conv_cfg,
in_channels,
stem_channels,
kernel_size=7,
stride=2,
padding=3,
bias=False)
self.norm1_name, norm1 = build_norm_layer(
self.norm_cfg, stem_channels, postfix=1)
self.add_module(self.norm1_name, norm1)
self.relu = nn.ReLU(inplace=True)
#构建maxpool层
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
def __init__(self,
in_channels=3, # input stem的输入通道
stem_channels=None, # input stem的输出通道
base_channels=64, # res layer的输入通道
conv_cfg=None, # 用于构建卷积层
norm_cfg=dict(type='BN', requires_grad=True)): # 用于构建BN层
if stem_channels is None:
stem_channels = base_channels
self.stem_channels = stem_channels #如果不指定stem_channnels,那么stem_channels=in_channels
self._make_stem_layer(in_channels, stem_channels)
二、res layer
forward函数中调用res layer的代码
outs = []
for i, layer_name in enumerate(self.res_layers):
res_layer = getattr(self, layer_name) #self.res_layers存放的时res layer的名字,这里根据名字获取module
x = res_layer(x)
if i in self.out_indices: # 存储每个stage输出的feature map
outs.append(x)
res layer的构建代码如下:
"""这段代码位于__init__函数中,与input stem一样,res layer同样在初始化时被创建好
resnet共有4个res layer(stage),其中每个stage中有多个block,其类型为BottleBlock或者BasicBlock,这些信息在arch_settings被定义
在第2、3、4个stage中进行下采样,仅发生在这些stage中的第1个block中,在原始的BottleBlock中,下采样发生在第一个卷积层(Conv1X1)中,
在resnet—B中将下采样改到了第二个卷积层(Conv3X3)中,4个stage的分辨率变化分别为1/4、1/8、1/16、1/32,下采样倍数在stride定义
resnet50中的channel变化为3(in_channels)--input stem-->64(stem_channels)
--stage1-->256(base_channels*2^0*expansion)
--stage2-->512(base_channels*2^1*expansion)
--stage3-->1024(base_channels*2^2*expansion)
--stage4-->2048(base_channels*2^3*expansion)
"""
self.res_layers = []
for i, num_blocks in enumerate(self.stage_blocks): # 每次循环构建一个res block
stride = strides[i] # 第一个block的stride
dilation = dilations[i]
dcn = self.dcn if self.stage_with_dcn[i] else None
if plugins is not None:
stage_plugins = self.make_stage_plugins(plugins, i)
else:
stage_plugins = None
planes = base_channels * 2**i
res_layer = self.make_res_layer(
block=self.block, # BasicBlock或者Bottleneck
inplanes=self.inplanes, # layer输入通道数
planes=planes, # layer输出通道数
num_blocks=num_blocks, # res layer中block数目
stride=stride, # 在第一个block中进行下采样
dilation=dilation,
style=self.style,
avg_down=self.avg_down,
with_cp=with_cp,
conv_cfg=conv_cfg, # conv配置文件
norm_cfg=norm_cfg, # norm配置文件
dcn=dcn,
plugins=stage_plugins,
init_cfg=block_init_cfg)
self.inplanes = planes * self.block.expansion # 下一个res layer的输入通道
layer_name = f'layer{i + 1}'
self.add_module(layer_name, res_layer) # 将res layer添加为resnet的子module
self.res_layers.append(layer_name) # 存储res layer的名字
arch_settings = {
18: (BasicBlock, (2, 2, 2, 2)),
34: (BasicBlock, (3, 4, 6, 3)),
50: (Bottleneck, (3, 4, 6, 3)),
101: (Bottleneck, (3, 4, 23, 3)),
152: (Bottleneck, (3, 8, 36, 3))
}
def __init__(self,
depth,
base_channels=64,
strides=(1, 2, 2, 2), # 各个res layer在第一个block的stride
dilations=(1, 1, 1, 1),
style='pytorch',
avg_down=False,
conv_cfg=None,
norm_cfg=dict(type='BN', requires_grad=True),
norm_eval=True,
dcn=None,
stage_with_dcn=(False, False, False, False),
plugins=None,
with_cp=False,
):
if depth not in self.arch_settings:
raise KeyError(f'invalid depth {depth} for resnet')
block_init_cfg = None
self.base_channels = base_channels
self.style = style
self.avg_down = avg_down
self.conv_cfg = conv_cfg
self.norm_cfg = norm_cfg
self.with_cp = with_cp
self.norm_eval = norm_eval
self.dcn = dcn
self.stage_with_dcn = stage_with_dcn
if dcn is not None:
assert len(stage_with_dcn) == num_stages
self.plugins = plugins
self.block, stage_blocks = self.arch_settings[depth] #
self.stage_blocks = stage_blocks[:num_stages]
self.inplanes = stem_channels
def make_stage_plugins(self, plugins, stage_idx):
stage_plugins = []
for plugin in plugins:
plugin = plugin.copy()
stages = plugin.pop('stages', None)
assert stages is None or len(stages) == self.num_stages
# whether to insert plugin into current stage
if stages is None or stages[stage_idx]:
stage_plugins.append(plugin)
return stage_plugins
def make_res_layer(self, **kwargs):
return ResLayer(**kwargs)
"""构建res layer的底层代码"""
class ResLayer(Sequential):
def __init__(self,
block,
inplanes,
planes,
num_blocks, # res layer中block数目
stride=1, # 下采样倍数,在第一个block进行下采样
avg_down=False, # 决定卷积层的方案,使不使用AvgPool
conv_cfg=None,
norm_cfg=dict(type='BN'),
downsample_first=True, # 第一个block是否进行下采样
**kwargs):
self.block = block
downsample = None
# 这一块构建Shortcut部分,在每一个block使用相同的Shortcut部分
# 当发生下面两种情况时需要使用卷积层,1、PathA中进行了下采样,2、PathA中改变了channels
# 卷积层方案有两种,1、只使用一个Conv1X1,同时进行下采样和改变channels,2、使用AvgPool进行下采样,使用Conv1X1改变channels
if stride != 1 or inplanes != planes * block.expansion: # 判断需不需要使用卷积层
downsample = []
conv_stride = stride # 控制Conv1X1的下采样倍数
if avg_down: # 决定卷积层方案,当avg_down为True时添加AvgPool,参考ResNet-D
conv_stride = 1 # 使用AvgPool进行下采样时,将conv_stride改成1,在Conv1X1不需要下采样
downsample.append(
nn.AvgPool2d(
kernel_size=stride,
stride=stride, # 下采样
ceil_mode=True, # 向上取整
count_include_pad=False)) # 计算平均值时是否包含0填充
downsample.extend([
build_conv_layer( # 1*1卷积
conv_cfg,
inplanes,
planes * block.expansion, # 改变channels
kernel_size=1,
stride=conv_stride, # 使用AvgPool时为1,否则为2
bias=False), # 后面跟BN层时,不使用偏置
build_norm_layer(norm_cfg, planes * block.expansion)[1]
])
downsample = nn.Sequential(*downsample) # Shortcut
layers = []
# 构建res layer
if downsample_first: # 这里是用于resnet layer的
# 对于一个block内部的通道数变化为inplanes-->planes-->planes-->planse*expansion
# 不同的stage需要改变inplanes、planes
# 同一个stage中,需要改变第二个block的inplanes
layers.append(
block(
inplanes=inplanes,
planes=planes,
stride=stride,
downsample=downsample,
conv_cfg=conv_cfg,
norm_cfg=norm_cfg,
**kwargs))
inplanes = planes * block.expansion # 改变第二个block的inplanes
for _ in range(1, num_blocks):
layers.append(
block(
inplanes=inplanes,
planes=planes,
stride=1,
conv_cfg=conv_cfg,
norm_cfg=norm_cfg,
**kwargs))
else: # downsample_first=False is for HourglassModule 这里是用于HourglassModule的
for _ in range(num_blocks - 1):
layers.append(
block(
inplanes=inplanes,
planes=inplanes,
stride=1,
conv_cfg=conv_cfg,
norm_cfg=norm_cfg,
**kwargs))
layers.append(
block(
inplanes=inplanes,
planes=planes,
stride=stride,
downsample=downsample,
conv_cfg=conv_cfg,
norm_cfg=norm_cfg,
**kwargs))
super(ResLayer, self).__init__(*layers)
三、参数初始化
"""这段代码位于__init__函数中,用来初始化参数
准确说,这段代码用来设置init_cfg,真正的初始化代码位于BaseModule中
"""
block_init_cfg = None
assert not (init_cfg and pretrained), \
'init_cfg and pretrained cannot be specified at the same time' # 不能同时设定init_cfg和pretrained
if isinstance(pretrained, str): # pretrained是预训练模型的路径
warnings.warn('DeprecationWarning: pretrained is deprecated, ' # pretrained实际上已经被废弃了,这里保留的原因是为了兼容以前的版本
'please use "init_cfg" instead')
self.init_cfg = dict(type='Pretrained', checkpoint=pretrained) # 检查到设置了pretrain以后,仍然要初始化到init_cfg中,推荐直接使用init_cfg进行配置
elif pretrained is None:
if init_cfg is None: # 如果没有设置pretrained,也没有设置init_cfg
self.init_cfg = [
dict(type='Kaiming', layer='Conv2d'), #卷积层使用Kaiming初始化
dict(
type='Constant',
val=1,
layer=['_BatchNorm', 'GroupNorm']) #BN层使用Constant初始化,值为1
]
block = self.arch_settings[depth][0]
if self.zero_init_residual: # 这个参数控制着是否要将block的最后一个BN层的gamma和beta初始为0,详情参考:
if block is BasicBlock: #<<Bag of Tricks for Image Classification with Convolutional Neural Networks>>
block_init_cfg = dict(
type='Constant',
val=0,
override=dict(name='norm2'))
elif block is Bottleneck:
block_init_cfg = dict(
type='Constant',
val=0,
override=dict(name='norm3'))
else:
raise TypeError('pretrained must be a str or None')
def __init__(self,
zero_init_residual=True, # 是否要将block的最后一个BN层的gamma和beta初始为0
pretrained,# 预训练模型路径,不推荐使用
init_cfg # 初始化配置
):
self.zero_init_residual = zero_init_residual
四、冻结参数
"""当使用预训练模型进行训练时,我们可以将使用预训练模型初始化的参数冻结,对其他参数微调,可以节省显存,加快训练速度
_freeze_stages函数在__init__函数中被调用
"""
self._freeze_stages()
def _freeze_stages(self):
"""1、使用eval()控制BN和dropOut的mean、std参数,2、使用param.requires_grad = False停止计算梯度"""
if self.frozen_stages >= 0: # 冻结哪几个stage(-1,0,1,2,3,4)
# 冻结input stem参数
if self.deep_stem:
self.stem.eval() # 1
for param in self.stem.parameters(): # 2
param.requires_grad = False
else:
self.norm1.eval() # 1
for m in [self.conv1, self.norm1]: # 2
for param in m.parameters():
param.requires_grad = False
for i in range(1, self.frozen_stages + 1):
m = getattr(self, f'layer{i}')
m.eval() # 1
for param in m.parameters():
param.requires_grad = False # 2
五、重写train()
当我们训练模型时,会调用model.train()
如果补充些train()函数,BN
层和DropOut
层在_freeze_stages
函数中的设置将会被覆盖
def train(self, mode=True):
super(ResNet, self).train(mode)
self._freeze_stages()
if mode and self.norm_eval:
for m in self.modules():
# trick: eval have effect on BatchNorm only
if isinstance(m, _BatchNorm):
m.eval()