Backbone:最主要的就是用来提特征(VGG,ResNet),可以改的参数d深度(深度越深提取效果可能越好),w宽度(参数量越多提取效果可能更好),r输入大小(输入分辨率增加h和w扩充特征图参数也会越多)。
EfficientNet包含B0-B7版本,随着数字会依次增大。网络的特征图个数、层数、输入分辨率都会对结果产生影响。
单独提升这些指标都能使得效果提升,但是会遇到瓶颈。其中FLOPS为计算量不是权重参数,例如卷积计算量=H*W*K*K*M*N,卷积核K*K输入M(inputchannel)个特征图输出为N(outputchannel)个特征图,HW为输出长宽,就是H*W个输出窗口,每个窗口的参数量都是卷积K*K*inputchannel*outputchannel。
综合提升指标,用参数搜索的方法得出结论。
基本网络架构B0
Depthwise卷积(深度卷积)
普通卷积的参数会很多因为一个filter要考虑所有的输入(算蓝、黄、橙的内积再加上偏置项,每个卷积核核输入的每个通道都内积运算)3个卷积核得到3个特征图,让卷积核核输入通道一对一进行内积运算,得到3个特征图。虽然提取的特征少了但是参数量成倍减少。卷积运算希望输出的特征图翻倍,提取特征多,因此有了深度可分离卷积。每个输入通道独立地进行卷积操作。
Pointwise卷积(逐点卷积)
之后再连多个1*1卷积增加了输出特征图个数。1*1卷积核节省了参数同时增大了输出特征图。
计算流程
width是每一层特征图个数,depth是指要堆叠多少次模块,res是指分辨率大小,dropout是进行dropout的概率
MBConv
MBConv流程图
class EfficientNet(nn.Module):
"""
modified by Zylo117
"""
def __init__(self, compound_coef, load_weights=False):
super(EfficientNet, self).__init__()
model = EffNet.from_pretrained(f'efficientnet-b{compound_coef}', load_weights)
del model._conv_head
del model._bn1
del model._avg_pooling
del model._dropout
del model._fc
self.model = model
def forward(self, x):
print(x.shape)
x = self.model._conv_stem(x)
print(x.shape)
x = self.model._bn0(x)
x = self.model._swish(x)
feature_maps = []
# TODO: temporarily storing extra tensor last_x and del it later might not be a good idea,
# try recording stride changing when creating efficientnet,
# and then apply it here.
last_x = None
for idx, block in enumerate(self.model._blocks): # block块读取
drop_connect_rate = self.model._global_params.drop_connect_rate # drop-connect是去掉神经元的连接
if drop_connect_rate:
drop_connect_rate *= float(idx) / len(self.model._blocks)
x = block(x, drop_connect_rate=drop_connect_rate)
if block._depthwise_conv.stride == [2, 2]:
feature_maps.append(last_x)
elif idx == len(self.model._blocks) - 1:
feature_maps.append(x)
last_x = x
del last_x
return feature_maps[1:]
class MBConvBlock(nn.Module):
"""
Mobile Inverted Residual Bottleneck Block
Args:
block_args (namedtuple): BlockArgs, see above
global_params (namedtuple): GlobalParam, see above
Attributes:
has_se (bool): Whether the block contains a Squeeze and Excitation layer.
"""
def __init__(self, block_args, global_params):
super().__init__()
self._block_args = block_args
self._bn_mom = 1 - global_params.batch_norm_momentum
self._bn_eps = global_params.batch_norm_epsilon
self.has_se = (self._block_args.se_ratio is not None) and (0 < self._block_args.se_ratio <= 1)
self.id_skip = block_args.id_skip # skip connection and drop connect
# Get static or dynamic convolution depending on image size
Conv2d = get_same_padding_conv2d(image_size=global_params.image_size)
# Expansion phase
inp = self._block_args.input_filters # number of input channels
oup = self._block_args.input_filters * self._block_args.expand_ratio # number of output channels
if self._block_args.expand_ratio != 1:
self._expand_conv = Conv2d(in_channels=inp, out_channels=oup, kernel_size=1, bias=False)
self._bn0 = nn.BatchNorm2d(num_features=oup, momentum=self._bn_mom, eps=self._bn_eps)
# Depthwise convolution phase
k = self._block_args.kernel_size
s = self._block_args.stride
self._depthwise_conv = Conv2d(
in_channels=oup, out_channels=oup, groups=oup, # groups makes it depthwise
kernel_size=k, stride=s, bias=False)
self._bn1 = nn.BatchNorm2d(num_features=oup, momentum=self._bn_mom, eps=self._bn_eps)
# Squeeze and Excitation layer, if desired
if self.has_se:
num_squeezed_channels = max(1, int(self._block_args.input_filters * self._block_args.se_ratio))
self._se_reduce = Conv2d(in_channels=oup, out_channels=num_squeezed_channels, kernel_size=1)
self._se_expand = Conv2d(in_channels=num_squeezed_channels, out_channels=oup, kernel_size=1)
# Output phase
final_oup = self._block_args.output_filters
self._project_conv = Conv2d(in_channels=oup, out_channels=final_oup, kernel_size=1, bias=False)
self._bn2 = nn.BatchNorm2d(num_features=final_oup, momentum=self._bn_mom, eps=self._bn_eps)
self._swish = MemoryEfficientSwish()
def forward(self, inputs, drop_connect_rate=None):
"""
:param inputs: input tensor
:param drop_connect_rate: drop connect rate (float, between 0 and 1)
:return: output of block
"""
# Expansion and Depthwise Convolution
x = inputs
if self._block_args.expand_ratio != 1: # 扩充的比例是否为1 32特征图到32*6再到32,MBConv6就是将输入的特征图输出为*6的个数
x = self._expand_conv(inputs)
x = self._bn0(x)
x = self._swish(x)
x = self._depthwise_conv(x)
x = self._bn1(x)
x = self._swish(x)
# Squeeze and Excitation
if self.has_se: # 是否加上注意力机制
x_squeezed = F.adaptive_avg_pool2d(x, 1) # 先做全局池化让特征图H,W池化都为1,忽略了位置信息,只考虑特征图信息
x_squeezed = self._se_reduce(x_squeezed) # 降为卷积操作
x_squeezed = self._swish(x_squeezed)
x_squeezed = self._se_expand(x_squeezed) # 升维卷积草走哦
x = torch.sigmoid(x_squeezed) * x # 注意力权值和特征图相乘
x = self._project_conv(x) # 通过卷积操作输出对应的特征图个数
x = self._bn2(x)
# Skip connection and drop connect
input_filters, output_filters = self._block_args.input_filters, self._block_args.output_filters
if self.id_skip and self._block_args.stride == 1 and input_filters == output_filters: # 输入特征图大小和输出特征大小若一样则进行残差连接
if drop_connect_rate:
x = drop_connect(x, p=drop_connect_rate, training=self.training)
x = x + inputs # skip connection
return x