参数重组在语义分割中的应用

最新推荐文章于 2023-08-24 09:17:29 发布

蓝鲸鱼BlueWhale

最新推荐文章于 2023-08-24 09:17:29 发布

阅读量637

点赞数 1

分类专栏：计算机视觉语义分割网络参数重组文章标签：深度学习 pytorch 神经网络

本文链接：https://blog.csdn.net/weixin_44579633/article/details/121118470

版权

计算机视觉同时被 3 个专栏收录

56 篇文章 24 订阅

订阅专栏

语义分割

8 篇文章 4 订阅

订阅专栏

网络参数重组

7 篇文章 2 订阅

订阅专栏

参数重组是一种最近比较火的设计网络的思想，即利用网络的重参数化，把多层合成一层，进行网络加速。

在我之前写的一些参数重组的博客中（链接），介绍了RepVGG、ACNet、DBB等设计于分类网络的替代卷积核。容易想到，如果使用类似的结构，将原本的普通卷积替换为空洞卷积，是否就能将参数重组应用于语义分割呢？

于是，我对各个替代卷积核对语义分割经典网络中的卷积核进行了替换，在文末附完整代码。以PSPNet为例，替换卷积后的RepPSP如下：

import math
import torch
import torch.nn.functional as F
from torch import nn
from .backbone import resnet
from .backbone import densenet
from base import BaseModel
from utils.helpers import initialize_weights, set_trainable
from itertools import chain
import numpy as np
from .repconvs import RepConv_dict


class _PSPModule(nn.Module):
    def __init__(self, in_channels, bin_sizes, norm_layer, deploy=False):
        self.deploy = deploy
        
        super(_PSPModule, self).__init__()
        out_channels = in_channels // len(bin_sizes)
        self.stages = nn.ModuleList([self._make_stages(in_channels, out_channels, b_s, norm_layer) 
                                                        for b_s in bin_sizes])
        self.bottleneck = nn.Sequential(
            RepConv(in_channels+(out_channels * len(bin_sizes)), out_channels, 
                                    kernel_size=3, padding=1, bias=False, deploy=self.deploy),
            norm_layer(out_channels),
            nn.ReLU(inplace=True),
            nn.Dropout2d(0.1)
        )

    def _make_stages(self, in_channels, out_channels, bin_sz, norm_layer):
        prior = nn.AdaptiveAvgPool2d(output_size=bin_sz)
        conv = nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False)
        bn = norm_layer(out_channels)
        relu = nn.ReLU(inplace=True)
        return nn.Sequential(prior, conv, bn, relu)
    
    def forward(self, features):
        h, w = features.size()[2], features.size()[3]
        pyramids = [features]
        pyramids.extend([F.interpolate(stage(features), size=(h, w), mode='bilinear', 
                                        align_corners=True) for stage in self.stages])
        output = self.bottleneck(torch.cat(pyramids, dim=1))
        return output


class RepPSP(BaseModel):
    def __init__(self, num_classes, deploy, repconv=None, in_channels=3, backbone='resnet152', pretrained=True, use_aux=True, 
                freeze_bn=False, freeze_backbone=False):
        super(RepPSP, self).__init__()
        global RepConv
        RepConv = RepConv_dict[repconv]
        norm_layer = nn.BatchNorm2d
        model = getattr(resnet, backbone)(pretrained, norm_layer=norm_layer)
        m_out_sz = model.fc.in_features
        self.use_aux = use_aux 
        self.deploy = deploy
        

        self.initial = nn.Sequential(*list(model.children())[:4])
        if in_channels != 3:
            self.initial[0] = nn.Conv2d(in_channels, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.initial = nn.Sequential(*self.initial)
        
        self.layer1 = model.layer1
        self.layer2 = model.layer2
        self.layer3 = model.layer3
        self.layer4 = model.layer4

        self.master_branch = nn.Sequential(
            _PSPModule(m_out_sz, bin_sizes=[1, 2, 3, 6], norm_layer=norm_layer, deploy=self.deploy),
            nn.Conv2d(m_out_sz//4, num_classes, kernel_size=1)
        )

        self.auxiliary_branch = nn.Sequential(
            RepConv(m_out_sz//2, m_out_sz//4, kernel_size=3, padding=1, bias=False, deploy=self.deploy),
            norm_layer(m_out_sz//4),
            nn.ReLU(inplace=True),
            nn.Dropout2d(0.1),
            nn.Conv2d(m_out_sz//4, num_classes, kernel_size=1)
        )

        initialize_weights(self.master_branch, self.auxiliary_branch)
        if freeze_bn: self.freeze_bn()
        if freeze_backbone: 
            set_trainable([self.initial, self.layer1, self.layer2, self.layer3, self.layer4], False)
        

    def forward(self, x):
        input_size = (x.size()[2], x.size()[3])
        x = self.initial(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x_aux = self.layer3(x)
        x = self.layer4(x_aux)

        output = self.master_branch(x)
        output = F.interpolate(output, size=input_size, mode='bilinear')
        output = output[:, :, :input_size[0], :input_size[1]]

        if self.training and self.use_aux:
            aux = self.auxiliary_branch(x_aux)
            aux = F.interpolate(aux, size=input_size, mode='bilinear')
            aux = aux[:, :, :input_size[0], :input_size[1]]
            return output, aux
        return output

    def get_backbone_params(self):
        return chain(self.initial.parameters(), self.layer1.parameters(), self.layer2.parameters(), 
                   self.layer3.parameters(), self.layer4.parameters())

    def get_decoder_params(self):
        return chain(self.master_branch.parameters(), self.auxiliary_branch.parameters())

    def freeze_bn(self):
        for module in self.modules():
            if isinstance(module, nn.BatchNorm2d): module.eval()

可用于替换的RepConv列表如下：

RepConv_dict = {
    'acb': ACB, 
    'dbb': DBB,
    'dbb2': DBB2,
    'dbb3': DBB3,
    'dbb4': DBB4,
    'repvgg': RepVGG
}

其中ACB、DBB和RepVGG为分类网络中的替代卷积核，对其源码感兴趣的可以参考链接。而DBB2、DBB3、DBB4则是我基于DBB和ACB的基本支路进行的一些改造，其组成如下：

repvgg: 1x1 + 3x3 + identity
ddb:    k_origin + k_1x1 + k_1x1_kxk_merged + k_1x1_avg_merged
ddb2:   k_origin + k_1x1 + k_1x1_kxk_merged
ddb3:   k_origin + k_1x1 + k_1x1_kxk_merged + identity
ddb4:   k_origin + k_1x1 + k_1x1_kxk_merged + k_1x1_avg_merged + identity 
acb:    1x3 + 3x1 + 3x3

具体细节可以参考我的github代码。其使用方法也很简单，在配置文件中选择自己的网络参数：

{
  "name": "PSPNet",         // training session name
  "n_gpu": 1,               // number of GPUs to use for training.
  "use_synch_bn": true,     // Using Synchronized batchnorm (for multi-GPU usage)

    "arch": {
        "type": "PSPNet",   // name of model architecture to train
        "args": {
            "backbone": "resnet50",     // encoder type type
            "freeze_bn": false,         // When fine tuning the model this can be used
            "freeze_backbone": false,   // In this case only the decoder is trained
            "repconv": "repvgg"         // selecting the Repconv 
        }
    },

    "train_loader": {
        "type": "VOC",          // Selecting data loader
        "args":{
            "data_dir": "data/",  // dataset path
            "batch_size": 32,     // batch size
            "augment": true,      // Use data augmentation
            "crop_size": 380,     // Size of the random crop after rescaling
            "shuffle": true,
            "base_size": 400,     // The image is resized to base_size, then randomly croped
            "scale": true,        // Random rescaling between 0.5 and 2 before croping
            "flip": true,         // Random H-FLip
            "rotate": true,       // Random rotation between 10 and -10 degrees
            "blur": true,         // Adding a slight amount of blut to the image
            "split": "train_aug", // Split to use, depend of the dataset
            "num_workers": 8
        }
    },

    "val_loader": {     // Same for val, but no data augmentation, only a center crop
        "type": "VOC",
        "args":{
            "data_dir": "data/",
            "batch_size": 32,
            "crop_size": 480,
            "val": true,
            "split": "val",
            "num_workers": 4
        }
    },

    "optimizer": {
        "type": "SGD",
        "differential_lr": true,      // Using lr/10 for the backbone, and lr for the rest
        "args":{
            "lr": 0.01,               // Learning rate
            "weight_decay": 1e-4,     // Weight decay
            "momentum": 0.9
        }
    },

    "loss": "CrossEntropyLoss2d",     // Loss (see utils/losses.py)
    "ignore_index": 255,              // Class to ignore (must be set to -1 for ADE20K) dataset
    "lr_scheduler": {   
        "type": "Poly",               // Learning rate scheduler (Poly or OneCycle)
        "args": {}
    },

    "trainer": {
        "epochs": 80,                 // Number of training epochs
        "save_dir": "saved/",         // Checkpoints are saved in save_dir/models/
        "save_period": 10,            // Saving chechpoint each 10 epochs
  
        "monitor": "max Mean_IoU",    // Mode and metric for model performance 
        "early_stop": 10,             // Number of epochs to wait before early stoping (0 to disable)
        
        "tensorboard": true,        // Enable tensorboard visualization
        "log_dir": "saved/runs",
        "log_per_iter": 20,         

        "val": true,
        "val_per_epochs": 5         // Run validation each 5 epochs
    }
}

其中，可选择的arch包括：

FCN8, UNet, UNetResnet, SegNet, SegResNet, ENet, RepENet, GCN, UperNet, PSPNet, PSPDenseNet, RepPSP, RepPSPDense, DeepLab，RepDeepLab, DeepLab_DUC_HDC, RepDUCHDC