Global Convolution Network理解

自从VGG网络的出现,大卷积核逐渐被人们遗忘。人们更喜欢堆叠小卷积核来采样特征图。他们认为这样做可以再保证感受野不变的情况下减少参数,从而提升整个网络的效率。
大卷积真的那么狼狈不堪么?如果觉得他参数多的话咱们可以引入可分离卷积!不要一步卷积,分两部走。这样做的话参数可以减小到原来的1/C ,C为输入通道数。
GCN原文觉得这是一个涉及到分类定位的问题。这篇文章巧妙的在中间找到了一个平衡点。他把之前的经典分割网络分为了这2类

分类派:AlexNet,VGG Net, GoogleNet or ResNet

定位派: FCN, U-Net , DeepLab,Deconv Net

首先先附上代码

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.init as init
import torch.utils.model_zoo as model_zoo
from torchvision import models
import  cfg
import math


class GCN(nn.Module):
    def __init__(self, inplanes, planes , ks=7):
        super(GCN, self).__init__()

        self.conv_l1 = nn.Conv2d(inplanes, planes, kernel_size=(ks, 1),
                                 padding=(ks//2, 0))

        self.conv_l2 = nn.Conv2d(planes, planes, kernel_size=(1, ks),
                                 padding=(0, ks//2))
        self.conv_r1 = nn.Conv2d(inplanes, planes, kernel_size=(1, ks),
                                 padding=(0, ks//2))
        self.conv_r2 = nn.Conv2d(planes, planes, kernel_size=(ks, 1),
                                 padding=(ks//2, 0))

    def forward(self, x):
        x_l = self.conv_l1(x)
        x_l = self.conv_l2(x_l)

        x_r = self.conv_r1(x)
        x_r = self.conv_r2(x_r)

        x = x_l + x_r

        return x


class Refine(nn.Module):
    def __init__(self, planes):
        super(Refine, self).__init__()
        self.bn = nn.BatchNorm2d(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv1 = nn.Conv2d(planes, planes, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, padding=1)

    def forward(self, x):
        residual = x
        x = self.bn(x)
        x = self.relu(x)
        x = self.conv1(x)
        x = self.bn(x)
        x = self.relu(x)
        x = self.conv2(x)

        out = residual + x
        return out


class FCN(nn.Module):
    def __init__(self, num_classes):
        super(FCN, self).__init__()

        self.num_classes = num_classes

        resnet = models.resnet50(pretrained=True)

        self.conv1 = resnet.conv1
        self.bn0 = resnet.bn1
        self.relu = resnet.relu
        self.maxpool = resnet.maxpool

        self.layer1 = resnet.layer1
        self.layer2 = resnet.layer2
        self.layer3 = resnet.layer3
        self.layer4 = resnet.layer4

        self.gcn1 = GCN(2048, self.num_classes)
        self.gcn2 = GCN(1024, self.num_classes)
        self.gcn3 = GCN(512, self.num_classes)
        self.gcn4 = GCN(64, self.num_classes)
        self.gcn5 = GCN(64, self.num_classes)

        self.refine1 = Refine(self.num_classes)
        self.refine2 = Refine(self.num_classes)
        self.refine3 = Refine(self.num_classes)
        self.refine4 = Refine(self.num_classes)
        self.refine5 = Refine(self.num_classes)
        self.refine6 = Refine(self.num_classes)
        self.refine7 = Refine(self.num_classes)
        self.refine8 = Refine(self.num_classes)
        self.refine9 = Refine(self.num_classes)
        self.refine10 = Refine(self.num_classes)

        #self.out0 = self._classifier(2048)
        #self.out1 = self._classifier(1024)
        #self.out2 = self._classifier(512)
        #self.out_e = self._classifier(256)
        #self.out3 = self._classifier(64)
        #self.out4 = self._classifier(64)
        #self.out5 = self._classifier(32)

        #self.transformer = nn.Conv2d(256, 64, kernel_size=1)

    def _classifier(self, inplanes):
        return nn.Sequential(
            nn.Conv2d(inplanes, inplanes//2, 3, padding=1, bias=False),
            nn.BatchNorm2d(inplanes//2),
            nn.ReLU(inplace=True),
            nn.Dropout(.1),
            nn.Conv2d(inplanes//2, self.num_classes, 1),
        )

    def forward(self, x):
        input = x
        x = self.conv1(x)
        x = self.bn0(x)
        x = self.relu(x)
        conv_x = x
        x = self.maxpool(x)
        pool_x = x

        fm1 = self.layer1(x)
        fm2 = self.layer2(fm1)
        fm3 = self.layer3(fm2)
        fm4 = self.layer4(fm3)

        gcfm1 = self.refine1(self.gcn1(fm4))
        gcfm2 = self.refine2(self.gcn2(fm3))
        gcfm3 = self.refine3(self.gcn3(fm2))
        gcfm4 = self.refine4(self.gcn4(pool_x))
        gcfm5 = self.refine5(self.gcn5(conv_x))

        fs1 = self.refine6(F.upsample_bilinear(gcfm1, fm3.size()[2:]) + gcfm2)
        fs2 = self.refine7(F.upsample_bilinear(fs1, fm2.size()[2:]) + gcfm3)
        fs3 = self.refine8(F.upsample_bilinear(fs2, pool_x.size()[2:]) + gcfm4)
        fs4 = self.refine9(F.upsample_bilinear(fs3, conv_x.size()[2:]) + gcfm5)
        out = self.refine10(F.upsample_bilinear(fs4, input.size()[2:]))

        return out

# if __name__ == "__main__": #验证是否端到端输出 这里选用的Camvid数据集所以有12个类别!
#     import torch as t
#     rgb = t.randn(4, 3, 352, 480)   #input
# 
#     net = FCN(12)
# 
#     out = net(rgb)
# 
#     print(out.shape)


然后可视化一下结构,这里是我手画的有点乱但是依然梳理得清楚
在这里插入图片描述左上部分gcn小模块,总共会有5个。左下部分是Refine模块(用于改善边界)总共会有10个这样的小模块。右图是GCN的大框架,这里为了区分gcn小模块的名字,所以我把整个大框架命名为FCN(不是全卷积网络)!!!

与论文原文不同的是,框架整体到最后多做了一次和输入X的融合,所以一共做了5次融合。融合方式是FPN(通道数一样,尺寸大小一样)

实验部分

这里的下采样部分采用的是Resnet50在ImageNet预训练好的模块,我们一共做了4次下采样,所以最小的特征图为输入的2**4/1=1/16。当然你也可以下采样到1/8就停止。在这里上采样(恢复分辨率)采用的是简单粗暴的双线性插值放大而不是反卷积(原文用的是),因为反卷积的话会加大计算量,故不采用。

这次现在相对简单的Camvid数据集上做实验,如果连Camvid都搞不定,其他的数据集更搞不定…实验设备为NVIDIA Geforce RTX 3070(8G)

跑了20个epoch,batchsize设置为2(再大就要爆显存了)

在这里插入图片描述在这里插入图片描述
显然测试集miou还没收敛,但是验证集已经开始收敛了!!!最高val miou为63%
我后来会在Pascal_vol 2012 和 Cityscapes上测试的,验证其泛化性。

GCN论文原址:https://arxiv.org/abs/1703.02719

评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值