以vgg16为backbone复现FCN【网络结构代码】

本文介绍了基于VGG网络的FCN(FullyConvolutionalNetwork)的三个版本(FCN32s、FCN16s和FCN8s)的结构,重点阐述了如何通过移除全连接层并进行特征图上采样来实现语义分割。
摘要由CSDN通过智能技术生成

前段时间笔者出于实验室培训需要,回顾了语义分割开山之作FCN网络,分享出来给有需要的朋友学习使用

结构图中是将FCN网络的三个版本画在了一起,其实就是将VGG网络的全连接层去掉然后将其卷积部分获得的五种尺寸的特征图进行不同倍率的上采样操作并融合

#backbone部分

import torch.nn as nn

class VGG(nn.Module):
    def __init__(self):
        super(VGG, self).__init__()

        self.layer1=nn.Sequential(
            nn.Conv2d(3,64,3,1,padding=1),
            nn.ReLU(True),
            nn.Conv2d(64,64,3,1,padding=1),
            nn.ReLU(True),
            nn.MaxPool2d(2,2)
        )


        self.layer2 = nn.Sequential(
            nn.Conv2d(64,128,3,1,padding=1),
            nn.ReLU(True),
            nn.Conv2d(128,128,3,1,padding=1),
            nn.ReLU(True),
            nn.MaxPool2d(2, 2)
        )


        self.layer3 = nn.Sequential(
            nn.Conv2d(128,256,3,1,padding=1),
            nn.ReLU(True),
            nn.Conv2d(256,256,3,1,padding=1),
            nn.ReLU(True),
            nn.Conv2d(256, 256, 3, 1,padding=1),
            nn.ReLU(True),
            nn.MaxPool2d(2, 2)
        )

        self.layer4 = nn.Sequential(
            nn.Conv2d(256,512,3,1,padding=1),
            nn.ReLU(True),
            nn.Conv2d(512,512,3,1,padding=1),
            nn.ReLU(True),
            nn.Conv2d(512, 512, 3, 1,padding=1),
            nn.ReLU(True),
            nn.MaxPool2d(2, 2)
        )

        self.layer5 = nn.Sequential(
            nn.Conv2d(512, 512, 3, 1,padding=1),
            nn.ReLU(True),
            nn.Conv2d(512, 512, 3, 1,padding=1),
            nn.ReLU(True),
            nn.Conv2d(512, 512, 3, 1,padding=1),
            nn.ReLU(True),
            nn.MaxPool2d(2, 2)
        )


    #前向传播部分,需要将每一层的特征图保存下来,并return提供给后续转置卷积上采样使用

    def forward(self,x):
        x = self.layer1(x)
        pool1 = x
        x = self.layer2(x)
        pool2 = x
        x = self.layer3(x)
        pool3 = x
        x = self.layer4(x)
        pool4 = x
        x = self.layer5(x)
        pool5 = x

        return pool1,pool2,pool3,pool4,pool5



#FCN网络上采样部分

import torch.nn as nn
from VGG import VGG
import torch

class FCN32s(nn.Module):
    def __init__(self,numclass):
        super(FCN32s,self).__init__()
        self.numclass = numclass
        self.features = VGG()

        self.TransLayer1 = nn.Sequential(
            nn.ConvTranspose2d(512,512,kernel_size=3,stride=2,padding=1,output_padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(True)
        )

        self.TransLayer2 = nn.Sequential(
            nn.ConvTranspose2d(512,256,kernel_size=3,stride=2,padding=1,output_padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(True)
        )

        self.TransLayer3 = nn.Sequential(
            nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(True)
        )

        self.TransLayer4 = nn.Sequential(
            nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(True)
        )

        self.TransLayer5 = nn.Sequential(
            nn.ConvTranspose2d(64,32,kernel_size=3,stride=2,padding=1,output_padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(True)
        )

        self.classifier = nn.Conv2d(32,numclass,kernel_size=1)

    def forward(self,x):

        x = self.features(x)

        #vgg使用return返回多个值是会打包成一个元组,里面是五个元素分别是pool1~5
        # print(x[0].shape,x[1].shape,x[2].shape,x[3].shape,x[4].shape)

        y = self.TransLayer1(x[4])
        y = self.TransLayer2(y)
        y = self.TransLayer3(y)
        y = self.TransLayer4(y)
        y = self.TransLayer5(y)
        res = self.classifier(y)

        return res



class FCN8s(nn.Module):
    def __init__(self,numclass):
        super(FCN16s, self).__init__()
        self.numclass = numclass
        self.features = VGG()

        self.TransLayer1 = nn.Sequential(
            nn.ConvTranspose2d(512,512,kernel_size=3,stride=2,padding=1,output_padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(True)
        )

        self.TransLayer2 = nn.Sequential(
            nn.ConvTranspose2d(512,256,kernel_size=3,stride=2,padding=1,output_padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(True)
        )

        self.TransLayer3 = nn.Sequential(
            nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(True)
        )

        self.TransLayer4 = nn.Sequential(
            nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(True)
        )

        self.TransLayer5 = nn.Sequential(
            nn.ConvTranspose2d(64,32,kernel_size=3,stride=2,padding=1,output_padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(True)
        )

        self.classifier = nn.Conv2d(32,numclass,kernel_size=1)

    def forward(self,x):
        x = self.features(x)
        
        y = self.TransLayer1(x[4]) + x[3]
        y = self.TransLayer2(y)
        y = self.TransLayer3(y)
        y = self.TransLayer4(y)
        y = self.TransLayer5(y)

        return y

class FCN16s(nn.Module):
    def __init__(self,numclass):
        super(FCN16s, self).__init__()
        self.numclass = numclass
        self.features = VGG()

        self.TransLayer1 = nn.Sequential(
            nn.ConvTranspose2d(512,512,kernel_size=3,stride=2,padding=1,output_padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(True)
        )

        self.TransLayer2 = nn.Sequential(
            nn.ConvTranspose2d(512,256,kernel_size=3,stride=2,padding=1,output_padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(True)
        )

        self.TransLayer3 = nn.Sequential(
            nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(True)
        )

        self.TransLayer4 = nn.Sequential(
            nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(True)
        )

        self.TransLayer5 = nn.Sequential(
            nn.ConvTranspose2d(64,32,kernel_size=3,stride=2,padding=1,output_padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(True)
        )

        self.classifier = nn.Conv2d(32,numclass,kernel_size=1)

    def forward(self,x):
        x = self.features(x)
        
        y = self.TransLayer1(x[4]) + x[3]
        y = self.TransLayer2(y) + x[2]
        y = self.TransLayer3(y)
        y = self.TransLayer4(y)
        y = self.TransLayer5(y)

        return y

#传入一个tensor进行模拟测试
x = torch.rand(1,3,224,224)
fcn = FCN32s(numclass=5)
out_put = fcn(x)
#out_put = fcn.forward(x)    pytorch中没有指明调用类的哪个函数时,forward()函数优先自动执行
print(out_put.shape)
print(out_put[0, :, :, :])
print(out_put)


# probs = F.softmax(out_put, dim=1)
#
# # 输出概率张量
# print(probs)
#
#
# import matplotlib.pyplot as plt
# import torch
#
# # 选择第一个样本的预测结果
# original = x[0, :, :, :]
# predictions = out_put[0, :, :, :]
#
# # 获取最大概率对应的类别索引
# predicted_classes = torch.argmax(predictions, dim=0)
#
# # 绘制预测图
# plt.imshow(predicted_classes.numpy(), cmap='viridis')  # 假设类别索引对应颜色,可根据实际情况修改cmap
# plt.colorbar()
# plt.show()

FCN32s、16s、8s三者的网络结构部分是一样的,仅仅是前向传播处不一样,根据结构图将对应层的特征图进行融合即可~

  • 7
    点赞
  • 9
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
这里提供一份以VGG16为基础网络的锚框细化模块代码,供参考: ```python import tensorflow as tf def anchor_refinement_module(input_tensor, num_anchors=9): """ Anchor Refinement Module :param input_tensor: input tensor :param num_anchors: number of anchors :return: output tensor """ # Convolutional Layer 1 x = tf.layers.conv2d(input_tensor, filters=256, kernel_size=(3, 3), padding='same', activation=tf.nn.relu) x = tf.layers.batch_normalization(x) x = tf.layers.max_pooling2d(x, pool_size=(2, 2), strides=(2, 2), padding='same') # Convolutional Layer 2 x = tf.layers.conv2d(x, filters=256, kernel_size=(3, 3), padding='same', activation=tf.nn.relu) x = tf.layers.batch_normalization(x) x = tf.layers.max_pooling2d(x, pool_size=(2, 2), strides=(2, 2), padding='same') # Convolutional Layer 3 x = tf.layers.conv2d(x, filters=256, kernel_size=(3, 3), padding='same', activation=tf.nn.relu) x = tf.layers.batch_normalization(x) x = tf.layers.max_pooling2d(x, pool_size=(2, 2), strides=(2, 2), padding='same') # Convolutional Layer 4 x = tf.layers.conv2d(x, filters=256, kernel_size=(3, 3), padding='same', activation=tf.nn.relu) x = tf.layers.batch_normalization(x) # 1x1 Convolution for Classification cls = tf.layers.conv2d(x, filters=num_anchors * 2, kernel_size=(1, 1), padding='same', activation=None) # 1x1 Convolution for Regression reg = tf.layers.conv2d(x, filters=num_anchors * 4, kernel_size=(1, 1), padding='same', activation=None) return cls, reg ``` 以上代码中,我们定义了一个名为anchor_refinement_module的函数,它接受一个输入张量和一个锚框数量作为参数,并返回两个输出张量,一个用于分类,一个用于回归。 该函数的实现基于VGG16网络结构,并在其基础上添加了4个卷积层和2个1x1卷积层来生成分类和回归输出。分类输出有num_anchors * 2个通道,用于预测每个锚框的正/负标签,回归输出有num_anchors * 4个通道,用于预测每个锚框的坐标偏移量。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值