【飞桨】基于paddlepaddle的图像分割FCN8s的python实现

【飞桨】基于paddlepaddle的图像分割FCN8s的python实现

前言

前几天参加了飞桨深度学习学院的图像分割7日打卡营,学习了图像分割的有关网络,第三节课讲解了FCN全卷积网络,在此基于paddlepaddle框架实现FCN8s网络。

FCN网络

在这里插入图片描述
FCN全卷积网络用于图像分割,与图像分类网络不同,它没有全卷积层。
在这里插入图片描述
FCN网络的全卷积层是通过1×1卷积,将通道数变为图像分割的类别数num_classes。
在这里插入图片描述
将1×1卷积得到的feature map通过上采样得到与尺寸与输入图像大小一致的,通道数为分类数num_classes的tensor,对通道num_classes方向上进行soft_max,然后取argmax,对每一个像素进行分类。

在这里插入图片描述

在这里插入图片描述

通过融合不同的前层信息,FCN网络分为FCN8s、FCN16s和FCN32s,其中FCN8s的效果相对较好,以下为FCN8s网络进行实现。

FCN8s实现

import numpy as np
import paddle.fluid as fluid
from paddle.fluid.dygraph import to_variable
from paddle.fluid.dygraph import Conv2D
from paddle.fluid.dygraph import Conv2DTranspose
from paddle.fluid.dygraph import Dropout
from paddle.fluid.dygraph import BatchNorm
from paddle.fluid.dygraph import Pool2D
from paddle.fluid.dygraph import Linear
from vgg import VGG16BN

class FCN8s(fluid.dygraph.Layer):
 # TODO: create fcn8s model
    def __init__(self, num_classes=59):
        super(FCN8s, self).__init__()
        backbone = VGG16BN(pretrained=False)

        self.layer1 = backbone.layer1
        self.layer1[0].conv._padding = [100, 100]
        self.pool1 = Pool2D(pool_size=2, pool_stride=2, ceil_mode=True)
        self.layer2 = backbone.layer2
        self.pool2 = Pool2D(pool_size=2, pool_stride=2, ceil_mode=True)
        self.layer3 = backbone.layer3
        self.pool3 = Pool2D(pool_size=2, pool_stride=2, ceil_mode=True)
        self.layer4 = backbone.layer4
        self.pool4 = Pool2D(pool_size=2, pool_stride=2, ceil_mode=True)
        self.layer5 = backbone.layer5
        self.pool5 = Pool2D(pool_size=2, pool_stride=2, ceil_mode=True)

        self.fc6 = Conv2D(512, 4096, 7, act="relu")
        self.fc7 = Conv2D(4096, 4096, 1, act="relu")
        self.drop6 = Dropout()
        self.drop7 = Dropout()

        self.score = Conv2D(4096, num_classes, 1)
        self.score_pool3 = Conv2D(256, num_classes, 1)
        self.score_pool4 = Conv2D(512, num_classes, 1)

        self.up_output = Conv2DTranspose(num_channels=num_classes,
                                        num_filters=num_classes,
                                        filter_size=4,
                                        stride=2,
                                        bias_attr=False)
        self.up_pool4 = Conv2DTranspose(num_channels=num_classes,
                                        num_filters=num_classes,
                                        filter_size=4,
                                        stride=2,
                                        bias_attr=False)
        self.up_fianl = Conv2DTranspose(num_channels=num_classes,
                                        num_filters=num_classes,
                                        filter_size=16,
                                        stride=8,
                                        bias_attr=False)

    def forward(self, inputs):
        x = self.layer1(inputs)
        x = self.pool1(x)
        x = self.layer2(x)
        x = self.pool2(x)
        x = self.layer3(x)
        x = self.pool3(x)
        pool3 = x
        x = self.layer4(x)
        x = self.pool4(x)
        pool4 = x
        x = self.layer5(x)
        x = self.pool5(x)

        x = self.fc6(x)
        x = self.drop6(x)
        x = self.fc7(x)
        x = self.drop7(x)

        x = self.score(x)
        x = self.up_output(x)

        up_output = x
        x = self.score_pool4(pool4)

        x = x[:, :, 5:5+up_output.shape[2], 5:5+up_output.shape[3]]

        up_pool4 = x
        x = up_pool4 + up_output
        x = self.up_pool4(x)
        up_pool4 = x

        x = self.score_pool3(pool3)
        x = x[:, :, 9:9+up_pool4.shape[2], 9:9+up_pool4.shape[3]]
        up_pool3 = x

        x = up_pool3 + up_pool4

        x = self.up_fianl(x)
        x = x[:, :, 31:31+inputs.shape[2], 31:31+inputs.shape[3]]

        return x


def main():
    with fluid.dygraph.guard():
        x_data = np.random.rand(2, 3, 512, 512).astype(np.float32)
        x = to_variable(x_data)
        model = FCN8s(num_classes=59)
        model.eval()
        pred = model(x)
        print(pred.shape)
        
if __name__ == '__main__':
    main()

在百度AI Studio中运行FCN8s网络,结果如下:
在这里插入图片描述
该网络将一个(2,3,512,512)类型的张量转换为了(2,59,512,512)类型,其通道数变为了分类数num_classes=59。由于FCN8s的前几层卷积网络与VGG16的相同,因此直接使用VGG16作为backbone。具体的训练网络的代码和VGG的代码见百度图像分割7日打卡营Class2

评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值