deeplabv3+系列之ResNet骨干网络

最新推荐文章于 2025-03-10 10:58:20 发布
我不是坏人...真的
最新推荐文章于 2025-03-10 10:58:20 发布
阅读量5.1k
点赞数 6
分类专栏： paddle 文章标签：深度学习
本文链接：https://blog.csdn.net/weixin_43668325/article/details/113761155
版权
paddle 专栏收录该内容
10 篇文章
订阅专栏
deeplabv3+系列之ResNet骨干网络代码实现（包括了ResNet18，ResNet34，ResNet50，ResNet101，ResNet152）

基于paddlepaddle2.0版本的搭建。最近飞桨2.0版本出来啦！也挺好用的，所以就参考一些其他版本的代码，用paddlepaddle2.0版本重新写一下deeplabv3+ResNet网络，这篇文章为骨干网络部分。deeplabv3部分可以看下一篇文章：deeplabv3+系列之deeplabv3网络搭建
原论文地址：Deep Residual Learning for Image Recognition
paddlepaddle2.0版本安装教程
一.网络结构图：

二.可直接运行的代码：

import numpy as np
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.fluid.dygraph.base import to_variable
def SyncBatchNorm(*args, **kwargs):
    """一个是cpu情况下的归一化，一个是GPU情况下的归一化"""
    if paddle.get_device() == 'cpu':
        return nn.BatchNorm2D(*args, **kwargs)
    else:
        return nn.SyncBatchNorm(*args, **kwargs)

class ConvBNLayer(nn.Layer):
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride=1,
                 groups=1,
                 act=None,
                 dilation=1,
                 padding=None,
                 name=None):
        super(ConvBNLayer, self).__init__(name)

        if padding is None:
            padding = (kernel_size-1)//2
        else:
            padding=padding

        self.conv = nn.Conv2D(in_channels=in_channels,
                            out_channels=out_channels,
                            kernel_size=kernel_size,
                            stride=stride,
                            padding=padding,
                            groups=groups,
                            dilation=dilation,
                            bias_attr=False)
        self.bn = SyncBatchNorm(out_channels)
        self.act = act
        self._act_op = nn.ReLU()

    def forward(self, inputs):
        y = self.conv(inputs)
        y = self.bn(y)
        if self.act is 'relu':
            y = self._act_op(y)
        else:
            y = y
        return y

class BasicBlock(nn.Layer):
    expansion = 1  # expand ratio for last conv output channel in each block
    def __init__(self,
                 in_channels,
                 out_channels,
                 dilation=1,
                 stride=1,
                 padding=None,
                 shortcut=True,
                 name=None):
        super(BasicBlock, self).__init__(name)
        
        self.conv0 = ConvBNLayer(in_channels=in_channels,
                                 out_channels=out_channels,
                                 kernel_size=3,
                                 stride=stride,
                                 act='relu',
                                 name=name)
        self.conv1 = ConvBNLayer(in_channels=out_channels,
                                 out_channels=out_channels,
                                 kernel_size=3,
                                 act=None,
                                 name=name)
        if not shortcut:
            self.short = ConvBNLayer(in_channels=in_channels,
                                     out_channels=out_channels,
                                     kernel_size=1,
                                     stride=stride,
                                     act=None,
                                     name=name)
        self.shortcut = shortcut

    def forward(self, inputs):
        conv0 = self.conv0(inputs)
        conv1 = self.conv1(conv0)
        if self.shortcut:
            short = inputs
        else:
            short = self.short(inputs)
        #该OP是逐元素相加算子，输入 x 与输入 y 逐元素相加，并将各个位置的输出元素保存到返回结果中
        y = paddle.add(x=short, y=conv1)
        y = F.relu(y)
        return y

class BottleneckBlock(nn.Layer):
    expansion = 4
    def __init__(self,
                 in_channels,
                 out_channels,
                 stride=1,
                 shortcut=True,
                 dilation=1,
                 padding=None,
                 name=None):
        super(BottleneckBlock, self).__init__(name)

        self.conv0 = ConvBNLayer(in_channels=in_channels,
                                 out_channels=out_channels,
                                 kernel_size=1,
                                 act='relu')
#                                 name=name)
        self.conv1 = ConvBNLayer(in_channels=out_channels,
                                 out_channels=out_channels,
                                 kernel_size=3,
                                 stride=stride,
                                 padding=padding,
                                 act='relu',
                                 dilation=dilation)
 #                                name=name)
        self.conv2 = ConvBNLayer(in_channels=out_channels,
                                 out_channels=out_channels * 4,
                                 kernel_size=1,
                                 stride=1)
 #                                name=name)
        if not shortcut:
            self.short = ConvBNLayer(in_channels=in_channels,
                                     out_channels=out_channels * 4,
                                     kernel_size=1,
                                     stride=stride)
#                                     name=name)
        self.shortcut = shortcut
        self.num_channel_out = out_channels * 4

    def forward(self, inputs):
        conv0 = self.conv0(inputs)
        #print('conv0 shape=',conv0.shape)
        conv1 = self.conv1(conv0)
        #print('conv1 shape=', conv1.shape)
        conv2 = self.conv2(conv1)
        #print('conv2 shape=', conv2.shape)
        if self.shortcut:
            short = inputs
        else:
            short = self.short(inputs)

        #print('short shape=', short.shape)
        #该OP是逐元素相加算子，输入 x 与输入 y 逐元素相加，并将各个位置的输出元素保存到返回结果中
        y = paddle.add(x=short, y=conv2)
        y = F.relu(y)
        return y


class ResNet(nn.Layer):
    def __init__(self, layers=50, num_classes=1000, multi_grid=[1, 2, 4], duplicate_blocks=False):
        super(ResNet, self).__init__()
        self.layers = layers
        supported_layers = [18, 34, 50, 101, 152]
        assert layers in supported_layers
        mgr = [1, 2, 4] # multi grid rate for duplicated blocks

        if layers == 18:
            depth = [2, 2, 2, 2]
        elif layers == 34:
            depth = [3, 4, 6, 3]
        elif layers == 50:
            depth = [3, 4, 6, 3]
        elif layers == 101:
            depth = [3, 4, 23, 3]
        elif layers == 152:
            depth = [3, 8, 36, 3]

        if layers < 50:
            in_channels = [64, 64, 128, 256, 512]
        else:
            in_channels = [64, 256, 512, 1024, 2048]

        self.out_channels = [64, 128, 256, 512]

        self.conv = ConvBNLayer(in_channels=3,
                                out_channels=64,
                                kernel_size=7,
                                stride=2,
                                act='relu')
        self.pool2d_max = nn.MaxPool2D(
                                kernel_size=3,
                                stride=2,
                                padding=1)
        if layers < 50:
            block = BasicBlock
            l1_shortcut=True
        else:
            block = BottleneckBlock
            l1_shortcut=False
        
        self.layer1 = nn.Sequential(
                *self.make_layer(block,
                                 in_channels[0],
                                 self.out_channels[0],
                                 depth[0],
                                 stride=1,
                                 shortcut=l1_shortcut,
                                 name='layer1'))
        self.layer2 = nn.Sequential(
                *self.make_layer(block,
                                 in_channels[1],
                                 self.out_channels[1],
                                 depth[1],
                                 stride=2,
                                 name='layer2'))
        self.layer3 = nn.Sequential(
                *self.make_layer(block,
                                 in_channels[2],
                                 self.out_channels[2],
                                 depth[2],
                                 stride=1,
                                 dilation=2,
                                 name='layer3'))
        # add multi grid [1, 2, 4]      
        self.layer4 = nn.Sequential(
                *self.make_layer(block,
                                 in_channels[3],
                                 self.out_channels[3],
                                 depth[3],
                                 stride=1,
                                 name='layer4',
                                 dilation=multi_grid))

        if duplicate_blocks:
            self.layer5 = nn.Sequential(
                    *self.make_layer(block,
                                     in_channels[4],
                                     self.out_channels[3],
                                     depth[3],
                                     stride=1,
                                     name='layer5',
                                     dilation=[x*mgr[0] for x in multi_grid]))
            self.layer6 = nn.Sequential(
                    *self.make_layer(block,
                                     in_channels[4],
                                     self.out_channels[3],
                                     depth[3],
                                     stride=1,
                                     name='layer6',
                                     dilation=[x*mgr[1] for x in multi_grid]))
            self.layer7 = nn.Sequential(
                    *self.make_layer(block,
                                     in_channels[4],
                                     self.out_channels[3],
                                     depth[3],
                                     stride=1,
                                     name='layer7',
                                     dilation=[x*mgr[2] for x in multi_grid]))
        self.last_pool = nn.AdaptiveAvgPool2D(output_size=(1,1))#平均自适应池化

        self.fc = nn.Linear(in_features=self.out_channels[-1] * block.expansion,
                         out_features =num_classes)

        self.out_dim = self.out_channels[-1] * block.expansion


    def forward(self, inputs):
        x = self.conv(inputs)
        x = self.pool2d_max(x)

        #print(x.shape)
        x = self.layer1(x)
        #print(x.shape)
        x = self.layer2(x)
        #print(x.shape)
        x = self.layer3(x)
        #print(x.shape)
        x = self.layer4(x)
        #print(x.shape)

        x = self.last_pool(x)
        x = paddle.reshape(x, shape=[-1, self.out_dim])
        x = self.fc(x)

        return x

    def make_layer(self, block, in_channels, out_channels, depth, stride, dilation=1, shortcut=False, name=None):
        layers = []
        if isinstance(dilation, int):
            dilation = [dilation] * depth
        elif isinstance(dilation, (list, tuple)):
            assert len(dilation) == 3, "Wrong dilation rate for multi-grid | len should be 3"
            assert depth ==3, "multi-grid can only applied to blocks with depth 3"
        
        padding = []
        for di in dilation:
            if di>1:
                padding.append(di)
            else:
                padding.append(None)

        layers.append(block(in_channels,
                            out_channels,
                            stride=stride,
                            shortcut=shortcut,
                            dilation=dilation[0],
                            padding=padding[0],
                            name=f'{name}.0'))
        for i in range(1, depth):
            layers.append(block(out_channels * block.expansion,
                                out_channels,
                                stride=1,
                                dilation=dilation[i],
                                padding=padding[i],
                                name=f'{name}.{i}'))
        return layers

def ResNet18(**args):
    model = ResNet(layers=18)
    return model
def ResNet34(**args):
    model = ResNet(layers=34)
    return model
def ResNet50(**args):
    model = ResNet(layers=50)
    return model
def ResNet101(**args):
    model = ResNet(layers=101)
    return model
def ResNet152(**args):
    model = ResNet(layers=152)
    return model

def main():
    x_data = np.random.rand(2, 3, 224, 224).astype(np.float32)
    x = to_variable(x_data)
    #ResNet101，其他同理
    model = ResNet101()
    model.eval()
    pred = model(x)


    print('dilated resnet50: pred.shape = ', pred.shape)

main()