pytorch搭建语义分割模型

最新推荐文章于 2024-06-10 20:28:46 发布

弱小的皮卡龙

最新推荐文章于 2024-06-10 20:28:46 发布

阅读量888

点赞数 3

分类专栏：深度学习文章标签： pytorch MobileNetV2 pspnet segnet 语义分割

本文链接：https://blog.csdn.net/tgj2094942564/article/details/119567422

版权

深度学习专栏收录该内容

7 篇文章 0 订阅

订阅专栏

本文章的语义分割的backbone采用了MobileNetv1，以及MobileNetv2

mobileNetv1
请添加图片描述
注:dw,表示深度可分离卷积， s表示步长，Avg Pool表示平均池化
实现的代码如下

import torch
import torch.nn as nn
import torchvision.transforms as Transforms
import torch.nn.functional as F
import math

INPUT_SIZE = 224

class _conv_block(nn.Module):
    def __init__(self,in_channels, out_channels, stride=1, kernel_size=(3, 3), padding=1):
        super(_conv_block, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding),
            nn.BatchNorm2d(num_features=out_channels),
                
        )
    
    def forward(self, x):
        return F.relu6(self.features(x))
    

class _depthwise_conv_block(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1, kernel_size=(3, 3), padding=1):
        super(_depthwise_conv_block, self).__init__()
        self.conv1 = _conv_block(in_channels=in_channels, out_channels=in_channels, kernel_size=(1, 1),padding=0)
        self.conv2 = _conv_block(in_channels=in_channels, out_channels=out_channels, kernel_size=(3, 3), padding=1, stride=stride)
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        return x
    
class get_mobilenet_encoder(nn.Module):
    def __init__(self):
        super(get_mobilenet_encoder, self).__init__()
        self._conv1 = _conv_block(in_channels=3, out_channels=32, stride=2)
        self._depthwise_conv_block1 = _depthwise_conv_block(in_channels=32, out_channels=64)
        # f1_channels = 64
        
        self._depthwise_conv_block2 = _depthwise_conv_block(in_channels=64, out_channels=128, stride=2)
        self._depthwise_conv_block3 = _depthwise_conv_block(in_channels=128, out_channels=128)
        # f2_channels = 128        
        
        self._depthwise_conv_block4 = _depthwise_conv_block(in_channels=128, out_channels=256, stride=2)
        self._depthwise_conv_block5 = _depthwise_conv_block(in_channels=256, out_channels=256)
        # f3_channels = 256
        
        self._depthwise_conv_block6 = _depthwise_conv_block(in_channels=256, out_channels=512, stride=2)
        self._depthwise_conv_block7 = _depthwise_conv_block(in_channels=512, out_channels=512)
        self._depthwise_conv_block8 = _depthwise_conv_block(in_channels=512, out_channels=512)
        self._depthwise_conv_block9 = _depthwise_conv_block(in_channels=512, out_channels=512)
        self._depthwise_conv_block10 = _depthwise_conv_block(in_channels=512, out_channels=512)
        self._depthwise_conv_block11 = _depthwise_conv_block(in_channels=512, out_channels=512)
        # f4_channels = 512
        
        self._depthwise_conv_block12 = _depthwise_conv_block(in_channels=512, out_channels=1024, stride=2)
        self._depthwise_conv_block13 = _depthwise_conv_block(in_channels=1024, out_channels=1024)
        # f4_chnnels = 1024
    
    def forward(self, x):
        inputs = x.clone()
        x = self._conv1(x)
        x = self._depthwise_conv_block1(x)
        f1 = x.clone()
        print(f1.size())
        
        x = self._depthwise_conv_block2(x)
        print("="*10)
        x = self._depthwise_conv_block3(x)
        f2 = x.clone()
        print(f2.size())
        
        x = self._depthwise_conv_block4(x)
        x = self._depthwise_conv_block5(x)
        f3 = x.clone()
        print(f3.size())
        
        x = self._depthwise_conv_block6(x)
        x = self._depthwise_conv_block7(x)
        x = self._depthwise_conv_block8(x)
        x = self._depthwise_conv_block9(x)
        x = self._depthwise_conv_block10(x)
        x = self._depthwise_conv_block11(x)
        f4 = x.clone()
        print(f4.size())
        
        x = self._depthwise_conv_block12(x)
        x = self._depthwise_conv_block13(x)
        f5 = x.clone()
        print(f5.size())
        
        # [f1, f2, f3, f4, f5]
        return [f1, f2, f3, f4, f5]

采用mobileNetv1为主干搭建segnet网络
请添加图片描述
可以看到网络结构就是类似VGG，和一个反过来的VGG

class segnet_decoder(nn.Module):
    def __init__(self, num_classes, f):
        self.num_classes = num_classes
        super(segnet_decoder, self).__init__()
        self.f = f
        n_up = INPUT_SIZE // f.size()[3]
        n_up = int(math.log2(n_up))
        print(n_up)
        assert n_up >= 2
        self.in_c = f.size()[1]
        print(self.in_c)
        self.features = []
        self.features.append(nn.Conv2d(in_channels=self.in_c, out_channels=self.in_c//2, kernel_size=(3, 3), padding=1))
        self.in_c = self.in_c // 2
        self.features.append(nn.BatchNorm2d(num_features=self.in_c))
        # f1       [1, 64, 112, 112]
        # f2       [1, 128, 56, 56]
        # f3       [1, 256, 28, 28]
        # f4       [1, 512, 14, 14]
        # f5       [1, 1024, 7, 7]                    
        # 一般采用f4
        self.features.append(nn.ConvTranspose2d(in_channels=512, out_channels=512, stride=2, padding=1, kernel_size=(3, 3), output_padding=1))
        # f4 -> [1, 512, 512, 28, 28]
        self.features.append(nn.BatchNorm2d(num_features=512))
        for _ in range(n_up - 2):
            self.features.append(nn.ConvTranspose2d(in_channels=self.in_c, out_channels=self.in_c, kernel_size=(3, 3), stride=2, padding=1, output_padding=1))
            self.features.append(nn.Conv2d(in_channels = self.in_c, out_channels=self.in_c//2, kernel_size=(3, 3), padding=1))
            self.in_c = self.in_c//2
            self.features.append(nn.BatchNorm2d(num_features=self.in_c))
        
        self.features.append(nn.ConvTranspose2d(in_channels=self.in_c, out_channels=self.in_c, kernel_size=(3, 3), stride=2, padding=1, output_padding=1))
        self.features.append(nn.Conv2d(in_channels=self.in_c, out_channels=self.num_classes, kernel_size=(3, 3), padding=1))
        self.features.append(nn.BatchNorm2d(num_features=self.num_classes))
        self.features = nn.Sequential(*(self.features))
    
    def forward(self):
        x = self.features(self.f)
        return x


class mobilenet_segnet(nn.Module):
    def __init__(self, num_classes):
        super(mobilenet_segnet, self).__init___()
        self.getMobleEncoder = get_mobilenet_encoder()
        self.getMobileDecoder = segnet_decoder(num_classes)
    
    def forward(self, x):
        outputs = self.get_mobilenet_encoder(x)
        f4 = outputs[3].clone()
        x = self.getMobileDecoder()
        x = x.contiguous.view(-1, INPUT_SIZE * INPUT_SIZE)
        return F.softmax(x)

通过网络实现出来的效果为请添加图片描述

注：采用的随机权重（就是没有给模型加载任何训练好的权重）

MobileNetV2网络模型
请添加图片描述
只是bottleneck在dw上做了稍微的改进

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as Transforms
import PIL.Image as Image

'''
输入尺寸是224*224 * 3

'''

INPUT_SIZE = 224
NUM_CLASSES = 3

class _bottlenet(nn.Module):
    def __init__(self, in_channels, out_channels, expand_ratio=1, stride=1):
        super(_bottlenet, self).__init__()
        self.use_res_connect = (stride == 1) and (in_channels == out_channels)
        self.features = nn.Sequential(
            nn.Conv2d(in_channels=in_channels, out_channels=in_channels * expand_ratio, kernel_size=1),
            nn.BatchNorm2d(num_features=in_channels*expand_ratio),
            nn.ReLU6(inplace=True),
            
            nn.Conv2d(in_channels=in_channels * expand_ratio, out_channels=in_channels * expand_ratio, kernel_size=1),
            nn.BatchNorm2d(num_features=in_channels*expand_ratio),
            nn.ReLU6(inplace=True),
                        
            nn.Conv2d(in_channels=in_channels * expand_ratio, out_channels=out_channels, stride=stride,  kernel_size=3, padding=1),
            nn.BatchNorm2d(num_features=out_channels),
            nn.ReLU6(inplace=True),
        
                )
        
    def forward(self,  x):
        x_clone = x.clone()
        x = self.features(x)
#         print(x.size())
        if self.use_res_connect:
#             print("="*10)
#             print(x.size())
#             print(x_clone.size())
            x.add_(x_clone)
        return x
    

class get_mobilenetv2_encoder(nn.Module):
    def __init__(self, num_classes=3):
        super(get_mobilenetv2_encoder, self).__init__()
        self.features = []
        self.features.append(
            nn.Conv2d(in_channels=3, out_channels=32, kernel_size=(3, 3), padding=1, stride=2)
        )        
        self.features.append(
            nn.BatchNorm2d(num_features=32)
        )
        self.features.append(
            nn.ReLU6(inplace=True)
        )
        
        #------  3 ------
        
        self.features.append(
            _bottlenet(in_channels=32, out_channels=16, expand_ratio=1, stride=1)
        )
        
        
        # [t, c, n, s] = [6, 24, 2, 2]
        self.features.append(
            _bottlenet(in_channels=16, out_channels=24, expand_ratio=6, stride=2)
        )
        self.features.append(
            _bottlenet(in_channels=24, out_channels=24, expand_ratio=6, stride=1)
        )
        
        #------  6  -----
        
        
        # [t, c, n, s] = [6, 32, 3, 2]
        self.features.append(
            _bottlenet(in_channels=24, out_channels=32, expand_ratio=6, stride=2)
        )
        for i in range(2):
            self.features.append(
                _bottlenet(in_channels=32, out_channels=32, expand_ratio=6)
            )   
            
    
        #------  9  ------
        
        # [t, c, n, s] = [6, 64, 4, 2]
        self.features.append(
            _bottlenet(in_channels=32, out_channels=64, expand_ratio=6, stride=2)
        )
        for i in range(3):
            self.features.append(
                _bottlenet(in_channels=64, out_channels=64, expand_ratio=6)
            ) 
            
        #------  13  ------
            
        # [t, c, n, s] = [6, 96, 3, 1]
        self.features.append(
            _bottlenet(in_channels=64, out_channels=96, expand_ratio=6)
        )
        for i in range(2):
            self.features.append(
                _bottlenet(in_channels=96, out_channels=96, expand_ratio=6)
            ) 
            
        
        # [t, c, n, s] = [6, 160, 3, 2]
        self.features.append(
            _bottlenet(in_channels=96, out_channels=160, expand_ratio=6, stride=2)
        )
        for i in range(2):
            self.features.append(
                _bottlenet(in_channels=160, out_channels=160, expand_ratio=6)
            ) 
          
        
        
        # [t, c, n, s] = [6, 160, 3, 2]
        self.features.append(
            _bottlenet(in_channels=160, out_channels=320, expand_ratio=6)
        )
        
       # ------  19  -----
        # (320, 7, 7) -> (1280, 7, 7)
        self.features.append(
            nn.Conv2d(in_channels=320, out_channels=1280, kernel_size=1, stride=1)
        )
        self.features.append(
            nn.BatchNorm2d(num_features=1280)
        )
        self.features.append(
            nn.ReLU6(inplace=True)
        )
        
        
        self.features.append(
            nn.AvgPool2d(kernel_size=(7, 7))
        )
        
        self.features.append(
            nn.Conv2d(in_channels=1280, out_channels=num_classes, kernel_size=1)
        )
        
        self.features = nn.Sequential(*self.features)

    def forward(self, x):
        features = []
        a = [0, 3, 6, 9, 13]
        i = 0
        for op in self.features:
            x = op(x)
            if i in a:
                features.append(x.clone())
            i = i + 1
            
        return features
      

'''
    在PSPNet中，一般不会5次下采样，可选择的有三次下采样和四次下采样，在这里选择四次下采样

'''


class pool_block(nn.Module):
    def __init__(self, f, stride):
        super(pool_block, self).__init__()
        in_channels = f.size()[1]
        kernel_size = stride
        self.features = nn.Sequential(
            
            nn.AvgPool2d(kernel_size=kernel_size, stride=kernel_size, padding=kernel_size//2),
        
            nn.Conv2d(in_channels=in_channels, out_channels=512, kernel_size=1, stride=1, bias=False),
            
            nn.BatchNorm2d(num_features=512),
            
            nn.ReLU6(inplace=True),
            
            nn.Upsample(size=(INPUT_SIZE, INPUT_SIZE), mode="bilinear")
        )
        
    
    def forward(self, x):
        x = self.features(x)
        return x

搭建pspnet网络模型
请添加图片描述

class _pspnet(nn.Module):
    def __init__(self, inputs):
        super(_pspnet, self).__init__()
        pool_factors = [1, 2, 3, 6]
        [f1, f2, f3, f4, f5] = get_mobilenetv2_encoder().forward(inputs)
        self.features = []
        self.f4 = f4
        for k_s in pool_factors:
            self.features.append(pool_block(f4, k_s))
        
        self.features = nn.Sequential(*(self.features))
        
        self.conv_x = nn.Sequential(
            nn.Conv2d(in_channels=f4.size()[1], out_channels=512, kernel_size=1),
            nn.BatchNorm2d(num_features=512),
            nn.ReLU6(inplace=True),
            nn.Upsample(size=(INPUT_SIZE, INPUT_SIZE), mode="bilinear")
        )
        
        self.Conv = nn.Sequential(
            nn.Conv2d(in_channels=512*5, out_channels=NUM_CLASSES, kernel_size=1),
            nn.BatchNorm2d(num_features=NUM_CLASSES),
            nn.ReLU(inplace=True),
        
        )
        
    def forward(self):
        x = self.f4.clone()
        features = []
        features.append(self.conv_x(x))
        for feature in self.features:
#             print("=" * 10)
#             print(x.size())
            x = feature(self.f4.clone())
#             print(x.size())
            features.append(x)
        for i in features:
            print(i.size())
            
        feature_map = torch.cat(features, dim=1)
        feature_map = self.Conv(feature_map)
        feature_map = F.softmax(feature_map)
        return feature_map

随机权重得到的效果

unet就不用说了，大家都耳熟能详以MobileNetV2为backbone得到的效果

这些更多的是作为自己一个笔记，有很多关于网络的一些细节部分大家可以去参考相应的文献

弱小的皮卡龙

关注

3
点赞
踩
9

收藏

觉得还不错? 一键收藏
0
评论
pytorch搭建语义分割模型

本文章的语义分割的backbone采用了MobileNetv1，以及MobileNetv2mobileNetv1注:dw,表示深度可分离卷积， s表示步长，Avg Pool表示平均池化实现的代码如下import torchimport torch.nn as nnimport torchvision.transforms as Transformsimport torch.nn.functional as Fimport mathINPUT_SIZE = 224class _con
复制链接

扫一扫

专栏目录