本文章的语义分割的backbone采用了MobileNetv1,以及MobileNetv2
mobileNetv1
注:dw,表示深度可分离卷积, s表示步长,Avg Pool表示平均池化
实现的代码如下
import torch
import torch.nn as nn
import torchvision.transforms as Transforms
import torch.nn.functional as F
import math
INPUT_SIZE = 224
class _conv_block(nn.Module):
def __init__(self,in_channels, out_channels, stride=1, kernel_size=(3, 3), padding=1):
super(_conv_block, self).__init__()
self.features = nn.Sequential(
nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding),
nn.BatchNorm2d(num_features=out_channels),
)
def forward(self, x):
return F.relu6(self.features(x))
class _depthwise_conv_block(nn.Module):
def __init__(self, in_channels, out_channels, stride=1, kernel_size=(3, 3), padding=1):
super(_depthwise_conv_block, self).__init__()
self.conv1 = _conv_block(in_channels=in_channels, out_channels=in_channels, kernel_size=(1, 1),padding=0)
self.conv2 = _conv_block(in_channels=in_channels, out_channels=out_channels, kernel_size=(3, 3), padding=1, stride=stride)
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
return x
class get_mobilenet_encoder(nn.Module):
def __init__(self):
super(get_mobilenet_encoder, self).__init__()
self._conv1 = _conv_block(in_channels=3, out_channels=32, stride=2)
self._depthwise_conv_block1 = _depthwise_conv_block(in_channels=32, out_channels=64)
# f1_channels = 64
self._depthwise_conv_block2 = _depthwise_conv_block(in_channels=64, out_channels=128, stride=2)
self._depthwise_conv_block3 = _depthwise_conv_block(in_channels=128, out_channels=128)
# f2_channels = 128
self._depthwise_conv_block4 = _depthwise_conv_block(in_channels=128, out_channels=256, stride=2)
self._depthwise_conv_block5 = _depthwise_conv_block(in_channels=256, out_channels=256)
# f3_channels = 256
self._depthwise_conv_block6 = _depthwise_conv_block(in_channels=256, out_channels=512, stride=2)
self._depthwise_conv_block7 = _depthwise_conv_block(in_channels=512, out_channels=512)
self._depthwise_conv_block8 = _depthwise_conv_block(in_channels=512, out_channels=512)
self._depthwise_conv_block9 = _depthwise_conv_block(in_channels=512, out_channels=512)
self._depthwise_conv_block10 = _depthwise_conv_block(in_channels=512, out_channels=512)
self._depthwise_conv_block11 = _depthwise_conv_block(in_channels=512, out_channels=512)
# f4_channels = 512
self._depthwise_conv_block12 = _depthwise_conv_block(in_channels=512, out_channels=1024, stride=2)
self._depthwise_conv_block13 = _depthwise_conv_block(in_channels=1024, out_channels=1024)
# f4_chnnels = 1024
def forward(self, x):
inputs = x.clone()
x = self._conv1(x)
x = self._depthwise_conv_block1(x)
f1 = x.clone()
print(f1.size())
x = self._depthwise_conv_block2(x)
print("="*10)
x = self._depthwise_conv_block3(x)
f2 = x.clone()
print(f2.size())
x = self._depthwise_conv_block4(x)
x = self._depthwise_conv_block5(x)
f3 = x.clone()
print(f3.size())
x = self._depthwise_conv_block6(x)
x = self._depthwise_conv_block7(x)
x = self._depthwise_conv_block8(x)
x = self._depthwise_conv_block9(x)
x = self._depthwise_conv_block10(x)
x = self._depthwise_conv_block11(x)
f4 = x.clone()
print(f4.size())
x = self._depthwise_conv_block12(x)
x = self._depthwise_conv_block13(x)
f5 = x.clone()
print(f5.size())
# [f1, f2, f3, f4, f5]
return [f1, f2, f3, f4, f5]
采用mobileNetv1为主干搭建segnet网络
可以看到网络结构就是类似VGG,和一个反过来的VGG
class segnet_decoder(nn.Module):
def __init__(self, num_classes, f):
self.num_classes = num_classes
super(segnet_decoder, self).__init__()
self.f = f
n_up = INPUT_SIZE // f.size()[3]
n_up = int(math.log2(n_up))
print(n_up)
assert n_up >= 2
self.in_c = f.size()[1]
print(self.in_c)
self.features = []
self.features.append(nn.Conv2d(in_channels=self.in_c, out_channels=self.in_c//2, kernel_size=(3, 3), padding=1))
self.in_c = self.in_c // 2
self.features.append(nn.BatchNorm2d(num_features=self.in_c))
# f1 [1, 64, 112, 112]
# f2 [1, 128, 56, 56]
# f3 [1, 256, 28, 28]
# f4 [1, 512, 14, 14]
# f5 [1, 1024, 7, 7]
# 一般采用f4
self.features.append(nn.ConvTranspose2d(in_channels=512, out_channels=512, stride=2, padding=1, kernel_size=(3, 3), output_padding=1))
# f4 -> [1, 512, 512, 28, 28]
self.features.append(nn.BatchNorm2d(num_features=512))
for _ in range(n_up - 2):
self.features.append(nn.ConvTranspose2d(in_channels=self.in_c, out_channels=self.in_c, kernel_size=(3, 3), stride=2, padding=1, output_padding=1))
self.features.append(nn.Conv2d(in_channels = self.in_c, out_channels=self.in_c//2, kernel_size=(3, 3), padding=1))
self.in_c = self.in_c//2
self.features.append(nn.BatchNorm2d(num_features=self.in_c))
self.features.append(nn.ConvTranspose2d(in_channels=self.in_c, out_channels=self.in_c, kernel_size=(3, 3), stride=2, padding=1, output_padding=1))
self.features.append(nn.Conv2d(in_channels=self.in_c, out_channels=self.num_classes, kernel_size=(3, 3), padding=1))
self.features.append(nn.BatchNorm2d(num_features=self.num_classes))
self.features = nn.Sequential(*(self.features))
def forward(self):
x = self.features(self.f)
return x
class mobilenet_segnet(nn.Module):
def __init__(self, num_classes):
super(mobilenet_segnet, self).__init___()
self.getMobleEncoder = get_mobilenet_encoder()
self.getMobileDecoder = segnet_decoder(num_classes)
def forward(self, x):
outputs = self.get_mobilenet_encoder(x)
f4 = outputs[3].clone()
x = self.getMobileDecoder()
x = x.contiguous.view(-1, INPUT_SIZE * INPUT_SIZE)
return F.softmax(x)
通过网络实现出来的效果为
注:采用的随机权重(就是没有给模型加载任何训练好的权重)
MobileNetV2网络模型
只是bottleneck在dw上做了稍微的改进
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as Transforms
import PIL.Image as Image
'''
输入尺寸是224*224 * 3
'''
INPUT_SIZE = 224
NUM_CLASSES = 3
class _bottlenet(nn.Module):
def __init__(self, in_channels, out_channels, expand_ratio=1, stride=1):
super(_bottlenet, self).__init__()
self.use_res_connect = (stride == 1) and (in_channels == out_channels)
self.features = nn.Sequential(
nn.Conv2d(in_channels=in_channels, out_channels=in_channels * expand_ratio, kernel_size=1),
nn.BatchNorm2d(num_features=in_channels*expand_ratio),
nn.ReLU6(inplace=True),
nn.Conv2d(in_channels=in_channels * expand_ratio, out_channels=in_channels * expand_ratio, kernel_size=1),
nn.BatchNorm2d(num_features=in_channels*expand_ratio),
nn.ReLU6(inplace=True),
nn.Conv2d(in_channels=in_channels * expand_ratio, out_channels=out_channels, stride=stride, kernel_size=3, padding=1),
nn.BatchNorm2d(num_features=out_channels),
nn.ReLU6(inplace=True),
)
def forward(self, x):
x_clone = x.clone()
x = self.features(x)
# print(x.size())
if self.use_res_connect:
# print("="*10)
# print(x.size())
# print(x_clone.size())
x.add_(x_clone)
return x
class get_mobilenetv2_encoder(nn.Module):
def __init__(self, num_classes=3):
super(get_mobilenetv2_encoder, self).__init__()
self.features = []
self.features.append(
nn.Conv2d(in_channels=3, out_channels=32, kernel_size=(3, 3), padding=1, stride=2)
)
self.features.append(
nn.BatchNorm2d(num_features=32)
)
self.features.append(
nn.ReLU6(inplace=True)
)
#------ 3 ------
self.features.append(
_bottlenet(in_channels=32, out_channels=16, expand_ratio=1, stride=1)
)
# [t, c, n, s] = [6, 24, 2, 2]
self.features.append(
_bottlenet(in_channels=16, out_channels=24, expand_ratio=6, stride=2)
)
self.features.append(
_bottlenet(in_channels=24, out_channels=24, expand_ratio=6, stride=1)
)
#------ 6 -----
# [t, c, n, s] = [6, 32, 3, 2]
self.features.append(
_bottlenet(in_channels=24, out_channels=32, expand_ratio=6, stride=2)
)
for i in range(2):
self.features.append(
_bottlenet(in_channels=32, out_channels=32, expand_ratio=6)
)
#------ 9 ------
# [t, c, n, s] = [6, 64, 4, 2]
self.features.append(
_bottlenet(in_channels=32, out_channels=64, expand_ratio=6, stride=2)
)
for i in range(3):
self.features.append(
_bottlenet(in_channels=64, out_channels=64, expand_ratio=6)
)
#------ 13 ------
# [t, c, n, s] = [6, 96, 3, 1]
self.features.append(
_bottlenet(in_channels=64, out_channels=96, expand_ratio=6)
)
for i in range(2):
self.features.append(
_bottlenet(in_channels=96, out_channels=96, expand_ratio=6)
)
# [t, c, n, s] = [6, 160, 3, 2]
self.features.append(
_bottlenet(in_channels=96, out_channels=160, expand_ratio=6, stride=2)
)
for i in range(2):
self.features.append(
_bottlenet(in_channels=160, out_channels=160, expand_ratio=6)
)
# [t, c, n, s] = [6, 160, 3, 2]
self.features.append(
_bottlenet(in_channels=160, out_channels=320, expand_ratio=6)
)
# ------ 19 -----
# (320, 7, 7) -> (1280, 7, 7)
self.features.append(
nn.Conv2d(in_channels=320, out_channels=1280, kernel_size=1, stride=1)
)
self.features.append(
nn.BatchNorm2d(num_features=1280)
)
self.features.append(
nn.ReLU6(inplace=True)
)
self.features.append(
nn.AvgPool2d(kernel_size=(7, 7))
)
self.features.append(
nn.Conv2d(in_channels=1280, out_channels=num_classes, kernel_size=1)
)
self.features = nn.Sequential(*self.features)
def forward(self, x):
features = []
a = [0, 3, 6, 9, 13]
i = 0
for op in self.features:
x = op(x)
if i in a:
features.append(x.clone())
i = i + 1
return features
'''
在PSPNet中,一般不会5次下采样,可选择的有三次下采样和四次下采样,在这里选择四次下采样
'''
class pool_block(nn.Module):
def __init__(self, f, stride):
super(pool_block, self).__init__()
in_channels = f.size()[1]
kernel_size = stride
self.features = nn.Sequential(
nn.AvgPool2d(kernel_size=kernel_size, stride=kernel_size, padding=kernel_size//2),
nn.Conv2d(in_channels=in_channels, out_channels=512, kernel_size=1, stride=1, bias=False),
nn.BatchNorm2d(num_features=512),
nn.ReLU6(inplace=True),
nn.Upsample(size=(INPUT_SIZE, INPUT_SIZE), mode="bilinear")
)
def forward(self, x):
x = self.features(x)
return x
搭建pspnet网络模型
class _pspnet(nn.Module):
def __init__(self, inputs):
super(_pspnet, self).__init__()
pool_factors = [1, 2, 3, 6]
[f1, f2, f3, f4, f5] = get_mobilenetv2_encoder().forward(inputs)
self.features = []
self.f4 = f4
for k_s in pool_factors:
self.features.append(pool_block(f4, k_s))
self.features = nn.Sequential(*(self.features))
self.conv_x = nn.Sequential(
nn.Conv2d(in_channels=f4.size()[1], out_channels=512, kernel_size=1),
nn.BatchNorm2d(num_features=512),
nn.ReLU6(inplace=True),
nn.Upsample(size=(INPUT_SIZE, INPUT_SIZE), mode="bilinear")
)
self.Conv = nn.Sequential(
nn.Conv2d(in_channels=512*5, out_channels=NUM_CLASSES, kernel_size=1),
nn.BatchNorm2d(num_features=NUM_CLASSES),
nn.ReLU(inplace=True),
)
def forward(self):
x = self.f4.clone()
features = []
features.append(self.conv_x(x))
for feature in self.features:
# print("=" * 10)
# print(x.size())
x = feature(self.f4.clone())
# print(x.size())
features.append(x)
for i in features:
print(i.size())
feature_map = torch.cat(features, dim=1)
feature_map = self.Conv(feature_map)
feature_map = F.softmax(feature_map)
return feature_map
unet就不用说了,大家都耳熟能详
这些更多的是作为自己一个笔记,有很多关于网络的一些细节部分大家可以去参考相应的文献