MobileNet V2
创新:
1、改进ResNet中的Bottleneck,提出逆残差结构。11conv先进行升维,33深度可分离卷积,11进行降维。
2、最后的11conv不使用ReLU。
MobileNet V2代码实现
import torch
import torch.nn as nn
def _make_divisible(in_channels, diviosr=8, min_channel=None):
# 将输入特征图通道数 调整为 divisor的整数倍
if min_channel is None:
min_channel = diviosr
# 四舍五入
new_channel = max(min_channel, int(in_channels + diviosr / 2) // diviosr * diviosr)
# 确保向下取整时不会超过10%
if new_channel < 0.9 * in_channels:
new_channel += diviosr
return new_channel
class ConvBNRelu(nn.Sequential):
def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, groups=1):
padding = (kernel_size - 1) // 2
super(ConvBNRelu, self).__init__(
nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, groups=groups, bias=False),
nn.BatchNorm2d(out_channels),
nn.ReLU6(inplace=True)
)
class InvertResidual(nn.Module):
def __init__(self, in_channels, out_channels, stride, expand_ratio):
super(InvertResidual, self).__init__()
hidden_channels = in_channels * expand_ratio
self.use_shortcut = stride == 1 and in_channels == out_channels
layers = []
if expand_ratio != 1:
# 如果expand_ratio不等于1 就有第一层的1*1conv 等于1时 没有逆bottleneck中的第一个1*1conv
# 1*1 conv
layers.append(ConvBNRelu(in_channels, hidden_channels, kernel_size=1))
# extend == append extend为批量插入
layers.extend([
# 3*3 depth-wise conv
ConvBNRelu(hidden_channels, hidden_channels, kernel_size=3, stride=stride, groups=hidden_channels),
# 1*1 conv (linear)
nn.Conv2d(hidden_channels, out_channels, kernel_size=1, bias=False),
nn.BatchNorm2d(out_channels)
])
self.conv = nn.Sequential(*layers)
def forward(self, x):
if self.use_shortcut:
return x + self.conv(x)
else:
return self.conv(x)
class Mobilenet_V2(nn.Module):
# alpha 控制卷积层通道数的超参数
def __init__(self, num_classes, alpha=1.0, round_nearest=8):
super(Mobilenet_V2, self).__init__()
block = InvertResidual
# 将input、last channels 调整为8的整数倍
input_channels = _make_divisible(32 * alpha, round_nearest)
last_channels = _make_divisible(1280 * alpha, round_nearest)
invert_residual_setting = [
# t c n s
# t 扩展因子 扩大通道数
# c 输出特征矩阵的深度channel
# n bottlenec k重复次数
# s 第一个bottleneck中的步长
[1, 16, 1, 1],
[6, 24, 2, 2],
[6, 32, 3, 2],
[6, 64, 4, 2],
[6, 96, 3, 1],
[6, 160, 3, 2],
[6, 320, 1, 1],
]
features = []
# conv1
features.append(ConvBNRelu(in_channels=3, out_channels=input_channels, stride=2))
# invert bottleneck
for t, c, n, s in invert_residual_setting:
output_channels = _make_divisible(c * alpha, round_nearest)
for i in range(n):
stride = s if i == 0 else 1
features.append(block(input_channels, output_channels, stride, expand_ratio=t))
input_channels = output_channels
# 1*1 conv
features.append(ConvBNRelu(input_channels, last_channels, kernel_size=1))
# combine features layer
self.features = nn.Sequential(*features)
# classifier head
self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) # 指定输出矩阵的H,W
self.classifier = nn.Sequential(
nn.Dropout(0.2),
nn.Linear(last_channels, num_classes)
)
# weights init
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out')
if m.bias is not None:
nn.init.zeros_(m.bias)
elif isinstance(m, nn.BatchNorm2d):
nn.init.ones_(m.weight)
nn.init.zeros_(m.bias)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
nn.init.zeros_(m.bias)
def forward(self, x):
x = self.features(x)
x = self.avgpool(x)
x = torch.flatten(x, 1)
x = self.classifier(x)
return x
if __name__ == '__main__':
a = torch.randn(1, 3, 224, 224)
net = Mobilenet_V2(num_classes=5)
out = net(a)
print(out.shape)
MobileNet V3
使用SE注意力机制模块,并且在网络中深度卷积大量使用55大小的卷积核替换33卷积核。h-swish激活函数在保持精度情况下加快速度。
根据以上MobilenetV3-Large网络结构,实现代码如下:
from typing import Callable, List, Optional
import torch
from torch import nn, Tensor
from torch.nn import functional as F
from functools import partial
def _make_divisible(in_channels, diviosr=8, min_channel=None):
# 将输入特征图通道数 调整为 divisor的整数倍
if min_channel is None:
min_channel = diviosr
# 四舍五入
new_channel = max(min_channel, int(in_channels + diviosr / 2) // diviosr * diviosr)
# 确保向下取整时不会超过10%
if new_channel < 0.9 * in_channels:
new_channel += diviosr
return new_channel
class ConvBNAct(nn.Sequential):
def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, group=1, act=None):
padding = (kernel_size - 1) // 2
norm_layer = partial(nn.BatchNorm2d, eps=0.001, momentum=0.01)
if act is None:
act = nn.ReLU6
super(ConvBNAct, self).__init__(nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size,
stride=stride, padding=padding, groups=group, bias=False),
norm_layer(out_channels),
act(inplace=True))
class SE_Attention(nn.Module):
def __init__(self, in_channels, squeeze_factor=4):
super(SE_Attention, self).__init__()
# 两个fc层 第一个fc层为输入通道数/4 第二个fc与输入通道数保持一致
squeeze_hidden = _make_divisible(in_channels // squeeze_factor, 8)
self.fc1 = nn.Conv2d(in_channels, squeeze_hidden, 1)
self.fc2 = nn.Conv2d(squeeze_hidden, in_channels, 1)
def forward(self, x):
scale = F.adaptive_avg_pool2d(x, output_size=(1, 1))
scale = self.fc1(scale)
scale = F.relu(scale, inplace=True)
scale = self.fc2(scale)
scale = F.hardsigmoid(scale, inplace=True)
return scale * x
class InveredResidualConfig:
def __init__(self, in_channels, kernel_size, expand_channels, output_channels,
use_SE, act, stride, channels_factor_V2alpha):
self.in_channels = self.adjust_channels(in_channels, channels_factor_V2alpha)
self.kernel = kernel_size
# 1*1conv layer 除第一个bneck外,其余 升维
self.expand_channels = self.adjust_channels(expand_channels, channels_factor_V2alpha)
self.output_channels = self.adjust_channels(output_channels, channels_factor_V2alpha)
self.use_SE = use_SE
self.use_HardSWISH = act == 'HardSWISH'
self.stride = stride
@staticmethod
def adjust_channels(channels, channels_factor_V2alpha):
return _make_divisible(channels * channels_factor_V2alpha, 8)
class InvertedResidual(nn.Module):
def __init__(self, config: InveredResidualConfig):
super(InvertedResidual, self).__init__()
self.use_resdiual_connect = (config.stride == 1 and config.in_channels == config.output_channels)
layers = []
activation = nn.Hardswish if config.use_HardSWISH else nn.ReLU
if config.expand_channels != config.in_channels:
layers.append(ConvBNAct(
config.in_channels,
config.expand_channels,
kernel_size=1,
act=activation
))
# depth-wise
layers.append(ConvBNAct(
config.expand_channels,
config.expand_channels,
kernel_size=config.kernel,
stride=config.stride,
group=config.expand_channels,
act=activation
))
if config.use_SE:
layers.append(SE_Attention(config.expand_channels))
# 降维
layers.append(ConvBNAct(config.expand_channels,
config.output_channels,
kernel_size=1,
act=nn.Identity))
self.block = nn.Sequential(*layers)
self.out_channels = config.output_channels
def forward(self, x):
result = self.block(x)
if self.use_resdiual_connect:
result += x
return result
class MobilenetV3(nn.Module):
def __init__(self,
inverted_resdual_setting: InveredResidualConfig,
last_channels,
num_classes,):
super(MobilenetV3, self).__init__()
block = InvertedResidual
# norm_layer = partial(nn.BatchNorm2d, eps=0.001, momentum=0.01)
layers = []
firstconv_output_channel = inverted_resdual_setting[0].in_channels
layers.append(ConvBNAct(3, firstconv_output_channel, kernel_size=3, stride=2, act=nn.Hardswish))
# build inverted residual block
for config in inverted_resdual_setting:
layers.append(block(config))
# build last layers
lastconv_input_channels = inverted_resdual_setting[-1].output_channels
lastconv_output_channels = 6 * lastconv_input_channels
layers.append(ConvBNAct(lastconv_input_channels, lastconv_output_channels, kernel_size=1, act=nn.Hardswish))
self.features = nn.Sequential(*layers)
self.avgpool = nn.AdaptiveAvgPool2d(1)
# classification
self.classifier = nn.Sequential(
nn.Linear(lastconv_output_channels, last_channels),
nn.Hardswish(inplace=True),
nn.Dropout(p=0.2, inplace=True),
nn.Linear(last_channels, num_classes)
)
# weights init
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out')
if m.bias is not None:
nn.init.zeros_(m.bias)
elif isinstance(m, nn.BatchNorm2d):
nn.init.ones_(m.weight)
nn.init.zeros_(m.bias)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
nn.init.zeros_(m.bias)
def forward(self, x):
x = self.features(x)
x = self.avgpool(x)
x = torch.flatten(x, 1)
x = self.classifier(x)
return x
def mobilenet_large(num_classes=1000):
channels_factor_V2alpha = 1.0
bneck_config = partial(InveredResidualConfig, channels_factor_V2alpha=channels_factor_V2alpha)
adjust_channels = partial(InveredResidualConfig.adjust_channels, channels_factor_V2alpha=channels_factor_V2alpha)
inverted_residual_setting = [
# input_c, kernel, expanded_c, out_c, use_se, activation, stride
bneck_config(16, 3, 16, 16, False, "RE", 1),
bneck_config(16, 3, 64, 24, False, "RE", 2), # C1
bneck_config(24, 3, 72, 24, False, "RE", 1),
bneck_config(24, 5, 72, 40, True, "RE", 2), # C2
bneck_config(40, 5, 120, 40, True, "RE", 1),
bneck_config(40, 5, 120, 40, True, "RE", 1),
bneck_config(40, 3, 240, 80, False, "HardSWISH", 2), # C3
bneck_config(80, 3, 200, 80, False, "HardSWISH", 1),
bneck_config(80, 3, 184, 80, False, "HardSWISH", 1),
bneck_config(80, 3, 184, 80, False, "HardSWISH", 1),
bneck_config(80, 3, 480, 112, True, "HardSWISH", 1),
bneck_config(112, 3, 672, 112, True, "HardSWISH", 1),
bneck_config(112, 5, 672, 160, True, "HardSWISH", 2), # C4
bneck_config(160, 5, 960, 160, True, "HardSWISH", 1),
bneck_config(160, 5, 960, 160 , True, "HardSWISH", 1),
]
last_channels = adjust_channels(1280)
return MobilenetV3(inverted_resdual_setting=inverted_residual_setting,
last_channels=last_channels,
num_classes=num_classes)
if __name__ == '__main__':
a = torch.randn(1, 3, 224, 224)
net = mobilenet_large(num_classes=5)
out = net(a)
print(out.shape)