使用飞桨实现图像分割模型U-net的心得

xwz小王子

已于 2023-10-15 10:33:36 修改

阅读量687

点赞数

分类专栏：深度学习入门基础文章标签： paddlepaddle 人工智能

于 2020-10-26 08:56:59 首次发布

本文链接：https://blog.csdn.net/weixin_44887311/article/details/109282737

版权

深度学习入门基础专栏收录该内容

30 篇文章 0 订阅

订阅专栏

使用飞桨实现图像分割模型U-net的心得
简介

最近参加了百度的图像分割打卡营，学习了如何使用飞桨实现图像分割模型U-net，收获很大。
U-net

U-net是U-net语义分割系列网络的最新作，其前作有U-net。该系列模型的比较见下图：
在这里插入图片描述

U-net Series 关键结构对比
模型结构图示比较如下所示：
在这里插入图片描述

模型结构图示比较
代码实现
class UNet(Layer):
def init(self, num_classes=59):
super(UNet, self).init()
# encoder: 3->64->128->256->512
# mid: 512->1024->1024

    #TODO: 4 encoders, 4 decoders, and mid layers contains 2 1x1conv+bn+relu
    self.down1 = Encoder(num_channels=3, num_filters=64)
    self.down2 = Encoder(num_channels=64, num_filters=128)
    self.down3 = Encoder(num_channels=128, num_filters=256)
    self.down4 = Encoder(num_channels=256, num_filters=512)

    self.mid_conv1 = Conv2D(512, 1024, filter_size=1, padding=0, stride=1)
    self.mid_bn1 = BatchNorm(1024, act='relu')
    self.mid_conv2 = Conv2D(1024, 1024, filter_size=1, padding=0, stride=1)
    self.mid_bn2 = BatchNorm(1024, act='relu')

    self.up4 = Decoder(1024, 512)
    self.up3 = Decoder(512, 256)
    self.up2 = Decoder(256, 128)
    self.up1 = Decoder(128, 64)

    self.last_conv = Conv2D(num_channels=64, num_filters=num_classes, filter_size=1)


def forward(self, inputs):
    x1, x = self.down1(inputs)
    print(x1.shape, x.shape)
    x2, x = self.down2(x)
    print(x2.shape, x.shape)
    x3, x = self.down3(x)
    print(x3.shape, x.shape)
    x4, x = self.down4(x)
    print(x4.shape, x.shape)

    # middle layers
    x = self.mid_conv1(x)
    x = self.mid_bn1(x)
    x = self.mid_conv2(x)
    x = self.mid_bn2(x)

    print(x4.shape, x.shape)
    x = self.up4(x4, x)
    print(x3.shape, x.shape)
    x = self.up3(x3, x)
    print(x2.shape, x.shape)
    x = self.up2(x2, x)
    print(x1.shape, x.shape)
    x = self.up1(x1, x)
    print(x.shape)

    x = self.last_conv(x)

    return x

代码实现比较简单，得益于PaddlePaddle的简单易用。部分代码如下所示：

总结

U-net作为图像语义分割的一种经典模型具有结构清晰、效果好的优点。在飞桨中实现起来较容易，效果较好。百度图像分割七日打卡营收获很大，推荐大家学习。
参考文献：
完整代码：
from numpy.core.defchararray import decode, mod
import paddle
import numpy as np
import paddle.fluid as fluid
from paddle.fluid.dygraph import to_variable
from paddle.fluid.dygraph import Layer
from paddle.fluid.dygraph import Conv2D
from paddle.fluid.dygraph import BatchNorm
from paddle.fluid.dygraph import Pool2D
from paddle.fluid.dygraph import Conv2DTranspose

class Encoder(Layer):
def init(self, num_channels, num_filters):
super(Encoder, self).init()
# TODO：encoder contains:
# 3×3 conv + bn + relu
# 3×3 conv + bn + relu
# 2×2 pool
# return features before and after pool
self.conv1 = Conv2D(num_channels=num_channels,
num_filters=num_filters,
filter_size=3,
stride=1,
padding=1) # 3×3卷积的时候，padding=1的时候，尺寸不会变
self.bn1 = BatchNorm(num_filters, act=‘relu’)

    self.conv2 = Conv2D(num_channels=num_filters,
                        num_filters=num_filters,
                        filter_size=3,
                        stride=1,
                        padding=1)
    self.bn2 = BatchNorm(num_filters, act='relu')

    self.pool = Pool2D(pool_size=2, pool_stride=2, pool_type='max', ceil_mode=True)

def forward(self, inputs):
    x = self.conv1(inputs)
    x = self.bn1(x)
    x = self.conv2(x)
    x = self.bn2(x) # 灰色箭头concat
    x_pooled = self.pool(x)
    
    return x, x_pooled

class Decoder(Layer):
def init(self, num_channels, num_filters):
super(Decoder, self).init()
# TODO：encoder contains:
# 2×2 transpose conv, stride=2, p=0 (makes feature map 2× larger)
# 3×3 conv + bn + relu
# 3×3 conv + bn + relu
self.up = Conv2DTranspose(num_channels=num_channels, # 1024->512
num_filters=num_filters,
filter_size=2,
stride=2)

    self.conv1 = Conv2D(num_channels=num_channels,  # 1024
                        num_filters=num_filters,
                        filter_size=3,
                        stride=1,
                        padding=1)

    self.bn1 = BatchNorm(num_channels=num_filters, act='relu')

    self.conv2 = Conv2D(num_channels=num_filters,
                        num_filters=num_filters,
                        filter_size=3,
                        stride=1,
                        padding=1)

    self.bn2 = BatchNorm(num_channels=num_filters, act='relu')

def forward(self, inputs_prev, inputs):
    # TODO:forward contains an pad2d and concat
    # 原论文是input_prev进行crop，这里是对x进行padding，目的一样，就是把保证HW一致，进行concat
    x = self.up(inputs)
    # NCHW
    h_diff = (inputs_prev.shape[2] - x.shape[2])
    w_diff = (inputs_prev.shape[3] - x.shape[3])
    x = fluid.layers.pad2d(x, paddings=[h_diff//2, h_diff - h_diff//2, w_diff//2, w_diff - w_diff//2])
    # axis=1为C。NCHW，把channel concat
    x = fluid.layers.concat([inputs_prev, x], axis=1)
    x = self.conv1(x)
    x = self.bn1(x)
    x = self.conv2(x)
    x = self.bn2(x)
    return x

class UNet(Layer):
def init(self, num_classes=59):
super(UNet, self).init()
# encoder: 3->64->128->256->512
# mid: 512->1024->1024

    # TODO: 4 encoders, 4 decoders, and mid layers contain 2x (1x1conv+bn+relu)
    self.down1 = Encoder(num_channels=3, num_filters=64)
    self.down2 = Encoder(num_channels=64, num_filters=128)
    self.down3 = Encoder(num_channels=128, num_filters=256)
    self.down4 = Encoder(num_channels=256, num_filters=512)

    # 原论文应该是 3x3 padding=1,stride=1,这里使用1x1卷积
    self.midconv1 = Conv2D(num_channels=512, num_filters=1024, filter_size=1, padding =0, stride=1)
    self.bn1 = BatchNorm(num_channels=1024, act='relu')
    self.midconv2 = Conv2D(num_channels=1024, num_filters=1024, filter_size=1, padding=0, stride=1)
    self.bn2 = BatchNorm(num_channels=1024, act='relu')

    self.up1 = Decoder(num_channels=1024, num_filters=512)
    self.up2 = Decoder(num_channels=512, num_filters=256)
    self.up3 = Decoder(num_channels=256, num_filters=128)
    self.up4 = Decoder(num_channels=128, num_filters=64)

    # last_conv： channel：64->num_classes
    self.last_conv = Conv2D(num_channels=64, num_filters=num_classes, filter_size=1)


def forward(self, inputs):
    # encoder layer
    print('encoder layer:')
    x1, x = self.down1(inputs)
    print('input_pred:',x1.shape, 'x_pooled：', x.shape)
    x2, x = self.down2(x)
    print('input_pred:',x2.shape, 'x_pooled：', x.shape)
    x3, x = self.down3(x)
    print('input_pred:',x3.shape, 'x_pooled：', x.shape)
    x4, x = self.down4(x)
    print('input_pred:',x4.shape, 'x_pooled：', x.shape)

    # middle layer
    x = self.midconv1(x)
    x = self.bn1(x)
    x = self.midconv2(x)
    x = self.bn2(x)

    # decoder layer
    print('decoder layer:')
    x = self.up1(x4, x)
    print('up1_input_pred:',x4.shape, 'up1：', x.shape)
    x = self.up2(x3, x)
    print('up2_input_pred:',x3.shape, 'up2：', x.shape)
    x = self.up3(x2, x)
    print('up3_input_pred:',x2.shape, 'up3：', x.shape)
    x = self.up4(x1, x)
    print('up4_input_pred:',x1.shape, 'up4：', x.shape)

    x = self.last_conv(x)
    print('out_put:', x.shape)

    return x

def main():
with fluid.dygraph.guard(fluid.CPUPlace()):
model = UNet(num_classes=59)
x_data = np.random.rand(1, 3, 123, 123).astype(np.float32)
x_data = to_variable(x_data)
output = model(x_data)
output = output.numpy()

if name == “main”:
main()
结果：
~$ python ./work/U-Net.py
encoder layer:
input_pred: [1, 64, 123, 123] x_pooled： [1, 64, 62, 62]
input_pred: [1, 128, 62, 62] x_pooled： [1, 128, 31, 31]
input_pred: [1, 256, 31, 31] x_pooled： [1, 256, 16, 16]
input_pred: [1, 512, 16, 16] x_pooled： [1, 512, 8, 8]
decoder layer:
up1_input_pred: [1, 512, 16, 16] up1： [1, 512, 16, 16]
up2_input_pred: [1, 256, 31, 31] up2： [1, 256, 31, 31]
up3_input_pred: [1, 128, 62, 62] up3： [1, 128, 62, 62]
up4_input_pred: [1, 64, 123, 123] up4： [1, 64, 123, 123]
out_put: [1, 59, 123, 123]