一.resnet系列backbone
import torch.nn as nn
import math
import torch.utils.model_zoo as model_zoo
BatchNorm2d = nn.BatchNorm2d
__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 'deformable_resnet18', 'deformable_resnet50',
'resnet152']
model_urls = {
'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
}
def constant_init(module, constant, bias=0):
nn.init.constant_(module.weight, constant)
if hasattr(module, 'bias'):
nn.init.constant_(module.bias, bias)
def conv3x3(in_planes, out_planes, stride=1):
"""3x3 convolution with padding"""
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
padding=1, bias=False)
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None, dcn=None):
super(BasicBlock, self).__init__()
self.with_dcn = dcn is not None
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn1 = BatchNorm2d(planes)
self.relu = nn.ReLU(inplace=True)
self.with_modulated_dcn = False
if not self.with_dcn:
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, padding=1, bias=False)
else:
from torchvision.ops import DeformConv2d
deformable_groups = dcn.get('deformable_groups', 1)
offset_channels = 18
self.conv2_offset = nn.Conv2d(planes, deformable_groups * offset_channels, kernel_size=3, padding=1)
self.conv2 = DeformConv2d(planes, planes, kernel_size=3, padding=1, bias=False)
self.bn2 = BatchNorm2d(planes)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
if not self.with_dcn:
out = self.conv2(out)
else:
offset = self.conv2_offset(out)
out = self.conv2(out, offset)
out = self.bn2(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None, dcn=None):
super(Bottleneck, self).__init__()
self.with_dcn = dcn is not None
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
self.bn1 = BatchNorm2d(planes)
self.with_modulated_dcn = False
if not self.with_dcn:
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
else:
deformable_groups = dcn.get('deformable_groups', 1)
from torchvision.ops import DeformConv2d
offset_channels = 18
self.conv2_offset = nn.Conv2d(planes, deformable_groups * offset_channels, stride=stride, kernel_size=3, padding=1)
self.conv2 = DeformConv2d(planes, planes, kernel_size=3, padding=1, stride=stride, bias=False)
self.bn2 = BatchNorm2d(planes)
self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
self.bn3 = BatchNorm2d(planes * 4)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
self.dcn = dcn
self.with_dcn = dcn is not None
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
# out = self.conv2(out)
if not self.with_dcn:
out = self.conv2(out)
else:
offset = self.conv2_offset(out)
out = self.conv2(out, offset)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class ResNet(nn.Module):
def __init__(self, block, layers, in_channels=3, dcn=None):
self.dcn = dcn
self.inplanes = 64
super(ResNet, self).__init__()
self.out_channels = []
self.conv1 = nn.Conv2d(in_channels, 64, kernel_size=7, stride=2, padding=3,
bias=False)
self.bn1 = BatchNorm2d(64)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, layers[0])
self.layer2 = self._make_layer(block, 128, layers[1], stride=2, dcn=dcn)
self.layer3 = self._make_layer(block, 256, layers[2], stride=2, dcn=dcn)
self.layer4 = self._make_layer(block, 512, layers[3], stride=2, dcn=dcn)
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
elif isinstance(m, BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
if self.dcn is not None:
for m in self.modules():
if isinstance(m, Bottleneck) or isinstance(m, BasicBlock):
if hasattr(m, 'conv2_offset'):
constant_init(m.conv2_offset, 0)
def _make_layer(self, block, planes, blocks, stride=1, dcn=None):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.inplanes, planes * block.expansion,
kernel_size=1, stride=stride, bias=False),
BatchNorm2d(planes * block.expansion),
)
layers = []
layers.append(block(self.inplanes, planes, stride, downsample, dcn=dcn))
self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(block(self.inplanes, planes, dcn=dcn))
self.out_channels.append(planes * block.expansion)
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x2 = self.layer1(x)
x3 = self.layer2(x2)
x4 = self.layer3(x3)
x5 = self.layer4(x4)
return x2, x3, x4, x5
def resnet18(pretrained=True, **kwargs):
"""Constructs a ResNet-18 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
if pretrained:
assert kwargs['in_channels'] == 3, 'in_channels must be 3 whem pretrained is True'
print('load from imagenet')
model.load_state_dict(model_zoo.load_url(model_urls['resnet18']), strict=False)
return model
def deformable_resnet18(pretrained=True, **kwargs):
"""Constructs a ResNet-18 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(BasicBlock, [2, 2, 2, 2], dcn=dict(deformable_groups=1), **kwargs)
if pretrained:
assert kwargs['in_channels'] == 3, 'in_channels must be 3 whem pretrained is True'
print('load from imagenet')
model.load_state_dict(model_zoo.load_url(model_urls['resnet18']), strict=False)
return model
def resnet34(pretrained=True, **kwargs):
"""Constructs a ResNet-34 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
if pretrained:
assert kwargs['in_channels'] == 3, 'in_channels must be 3 whem pretrained is True'
model.load_state_dict(model_zoo.load_url(model_urls['resnet34']), strict=False)
return model
def resnet50(pretrained=True, **kwargs):
"""Constructs a ResNet-50 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
if pretrained:
assert kwargs['in_channels'] == 3, 'in_channels must be 3 whem pretrained is True'
model.load_state_dict(model_zoo.load_url(model_urls['resnet50']), strict=False)
return model
def deformable_resnet50(pretrained=True, **kwargs):
"""Constructs a ResNet-50 model with deformable conv.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(Bottleneck, [3, 4, 6, 3], dcn=dict(deformable_groups=1), **kwargs)
if pretrained:
assert kwargs['in_channels'] == 3, 'in_channels must be 3 whem pretrained is True'
model.load_state_dict(model_zoo.load_url(model_urls['resnet50']), strict=False)
return model
def resnet101(pretrained=True, **kwargs):
"""Constructs a ResNet-101 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
if pretrained:
assert kwargs['in_channels'] == 3, 'in_channels must be 3 whem pretrained is True'
model.load_state_dict(model_zoo.load_url(model_urls['resnet101']), strict=False)
return model
def resnet152(pretrained=True, **kwargs):
"""Constructs a ResNet-152 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)
if pretrained:
assert kwargs['in_channels'] == 3, 'in_channels must be 3 whem pretrained is True'
model.load_state_dict(model_zoo.load_url(model_urls['resnet152']), strict=False)
return model
if __name__ == '__main__':
import torch
x = torch.zeros(2, 3, 640, 640)
# net = deformable_resnet50(pretrained=False)
model_resnet50 = resnet101(pretrained=False)
y = model_resnet50(x)
for u in y:
print(u.shape)
# print(model_resnet34.out_channels)
二.mobilenet v2
2.1基础知识
1.采用inverted residual,与resnet不一样的是通道1X1卷积先变宽->卷积提特征->1X1卷积变窄,因为经过1x1的卷积扩大通道数以后,可以提升抽取特征的能力,图1所示。
2.最后不采用Relu,而使用Linear代替,因为降维后特征丢失部分,如果采用Relu还会丢失,图2所示.
图1 inverted residual
图2宽窄通道relu丢失信息对比
上图是嵌入高维空间的低维流形的ReLU变换示例。 在这些例子中,使用随机矩阵T和ReLU将第一个图的螺旋嵌入到n维空间中,然后使用T的逆矩阵投影回2D空间。 当n = 2,3导致信息损失很多,恢复后的张量坍缩严重,而对于n = 15到30,不会丢失太多的输入信息。显然当通道数较多时,如果输入流形可嵌入激活空间的显着较低维的子空间,则ReLU变换将保留信息。当通道数较少时,通道的信息很可能被丢弃。
综上所述,在通道数较少的层后,应该用线性激活代替ReLU。MobileNet V2的Linear bottleneck Inverted residual block中,降维后的1X1卷积层后接的是一个线性激活,其他情况用的是ReLU。
v1使用可分离卷积的计算减少量,如果k=3,也就是3x3卷积核,能后减少9倍左右的计算量.(k^2+dj)/(k^2*dj)
2.2 代码实现
1.BottleNeck实现
import torch.nn as nn
import torch
class BottleNeck(nn.Module):
def __init__(self, inchannles, outchannels, expansion=1, stride=1, downsample=None):
super(BottleNeck, self).__init__()
#1*1
self.conv1 = nn.Conv2d(inchannles, inchannles*expansion, kernel_size=1)
self.bn1 = nn.BatchNorm2d(inchannles*expansion)
#3*3 可分离卷积 groups设置
self.conv2 = nn.Conv2d(inchannles*expansion, inchannles * expansion, kernel_size=3, padding=1, stride=stride,groups=inchannles * expansion)
self.bn2 = nn.BatchNorm2d(inchannles * expansion)
#1*1
self.conv3 = nn.Conv2d(inchannles*expansion, outchannels, kernel_size=1)
self.bn3 = nn.BatchNorm2d(outchannels)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
def forward(self, x):
residul = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
residul = self.downsample(x)
out += residul
out = self.relu(out)
return out
def BottleNeck_test():
inchannels = 3
outchannels = 3
stride =2
downsample_ = nn.Sequential(
nn.Conv2d(inchannels, outchannels, kernel_size=1, stride=stride),
nn.BatchNorm2d(outchannels)
)
bottleneck = BottleNeck(inchannels, outchannels, expansion=2, downsample=downsample_, stride=stride)
print('bottleneck:', bottleneck)
x = torch.rand((8, 3, 224, 224))
out = bottleneck(x)
print('out.shape', out.shape)
if __name__ == '__main__':
BottleNeck_test()
2.整体代码,加了权重初始化
import torch.nn as nn
import torch
class BottleNeck(nn.Module):
def __init__(self, inchannles, outchannels, expansion=1, stride=1, downsample=None):
super(BottleNeck, self).__init__()
#1*1
self.conv1 = nn.Conv2d(inchannles, inchannles*expansion, kernel_size=1)
self.bn1 = nn.BatchNorm2d(inchannles*expansion)
#3*3 可分离卷积 groups设置
self.conv2 = nn.Conv2d(inchannles*expansion, inchannles * expansion, kernel_size=3, padding=1, stride=stride,groups=inchannles * expansion)
self.bn2 = nn.BatchNorm2d(inchannles * expansion)
#1*1
self.conv3 = nn.Conv2d(inchannles*expansion, outchannels, kernel_size=1)
self.bn3 = nn.BatchNorm2d(outchannels)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
def forward(self, x):
residul = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
residul = self.downsample(x)
out += residul
out = self.relu(out)
return out
class MobileNetV2(nn.Module):
def __init__(self, n, numclasses=1000):
super(MobileNetV2, self).__init__()
self.inchannels = 32
self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=2, padding=1)
self.bn1 = nn.BatchNorm2d(32)
self.relu = nn.ReLU(inplace=True)
self.layer1 = self.make_layer(n[0], outchannels=16, stride=1, expansion=1)
self.layer2 = self.make_layer(n[1], outchannels=24, stride=2, expansion=6)
self.layer3 = self.make_layer(n[2], outchannels=32, stride=2, expansion=6)
self.layer4 = self.make_layer(n[3], outchannels=64, stride=2, expansion=6)
self.layer5 = self.make_layer(n[4], outchannels=96, stride=1, expansion=6)
self.layer6 = self.make_layer(n[5], outchannels=160, stride=2, expansion=6)
self.layer7 = self.make_layer(n[6], outchannels=320, stride=1, expansion=1)
self.conv8 = nn.Conv2d(320, 1280, kernel_size=1, stride=1)
self.avegpool = nn.AvgPool2d(7, stride=1)
self.conv9 = nn.Conv2d(1280, numclasses, kernel_size=1, stride=1)
def make_layer(self, blocks_num, outchannels, stride, expansion):
downsample_ = nn.Sequential(
nn.Conv2d(self.inchannels, outchannels, kernel_size=1, stride=stride),
nn.BatchNorm2d(outchannels)
)
layers = []
#下采样的shortcut有downsample
temp = BottleNeck(self.inchannels, outchannels, expansion=expansion, stride=stride, downsample=downsample_)
layers.append(temp)
#剩下的shortcut干净
self.inchannels = outchannels
for i in range(1, blocks_num):
layers.append(BottleNeck(self.inchannels, outchannels, expansion=expansion, stride=1))
return nn.Sequential(*layers)#取出每一层
def forward(self, x):
x = self.conv1(x)
print('conv1.shape:', x.shape)
x = self.bn1(x)
x = self.relu(x)
x = self.layer1(x)
print('layer1.shape:', x.shape)
x = self.layer2(x)
print('layer2.shape:', x.shape)
x = self.layer3(x)
print('layer3.shape:', x.shape)
x = self.layer4(x)
print('layer4.shape:', x.shape)
x = self.layer5(x)
print('layer5.shape:', x.shape)
x = self.layer6(x)
print('layer6.shape:', x.shape)
x = self.layer7(x)
print('layer7.shape:', x.shape)
x = self.conv8(x)
print('conv8.shape:', x.shape)
x = self.avegpool(x)
print('avegpool:', x.shape)
x = self.conv9(x)
print('conv9.shape:', x.shape)
x = x.view(x.size(0), -1)
return x
def BottleNeck_test():
inchannels = 3
outchannels = 3
stride =2
downsample_ = nn.Sequential(
nn.Conv2d(inchannels, outchannels, kernel_size=1, stride=stride),
nn.BatchNorm2d(outchannels)
)
bottleneck = BottleNeck(inchannels, outchannels, expansion=2, downsample=downsample_, stride=stride)
print('bottleneck:', bottleneck)
x = torch.rand((8, 3, 224, 224))
out = bottleneck(x)
print('out.shape', out.shape)
def weigth_init(m):
if isinstance(m, nn.Conv2d):
nn.init.xavier_uniform_(m.weight.data)
nn.init.constant_(m.bias.data, 0.1)
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
elif isinstance(m, nn.Linear):
m.weight.data.normal_(0,0.01)
m.bias.data.zero_()
# print('weigth_init!')
def MobileNetV2_test():
model = MobileNetV2(n=[1, 2, 3, 4, 3, 3, 1], numclasses=10)
model.apply(weigth_init)
# print('model:', model)
x = torch.rand((8, 3, 224, 224))
out = model(x)
print('out.shape', out.shape)
if __name__ == '__main__':
# BottleNeck_test()
MobileNetV2_test()
1.0
==img.shape: torch.Size([1, 3, 256, 192])
==x.shape: torch.Size([1, 32, 128, 96])
==i, x.shape==: 0 torch.Size([1, 16, 128, 96])
==i, x.shape==: 1 torch.Size([1, 24, 64, 48])
==i, x.shape==: 2 torch.Size([1, 32, 32, 24])
==i, x.shape==: 3 torch.Size([1, 64, 16, 12])
==i, x.shape==: 4 torch.Size([1, 96, 16, 12])
==i, x.shape==: 5 torch.Size([1, 160, 8, 6])
==i, x.shape==: 6 torch.Size([1, 320, 8, 6])
==i, x.shape==: 7 torch.Size([1, 1280, 8, 6])
0.5
==img.shape: torch.Size([1, 3, 256, 192])
==x.shape: torch.Size([1, 16, 128, 96])
==i, x.shape==: 0 torch.Size([1, 8, 128, 96])
==i, x.shape==: 1 torch.Size([1, 16, 64, 48])
==i, x.shape==: 2 torch.Size([1, 16, 32, 24])
==i, x.shape==: 3 torch.Size([1, 32, 16, 12])
==i, x.shape==: 4 torch.Size([1, 48, 16, 12])
==i, x.shape==: 5 torch.Size([1, 80, 8, 6])
==i, x.shape==: 6 torch.Size([1, 160, 8, 6])
==i, x.shape==: 7 torch.Size([1, 640, 8, 6])
参考: