FCN跳跃结构实现图像分割

1 BagData.py

import os
import cv2
import numpy as np
from torch.utils.data import Dataset


# onehot编码
def onehot(n, data):
    msk = np.eye(n)[data]  # (C, H, W) => (224, 224, 2)
    msk = np.array(list(map(lambda x: abs(x - 1), msk))).astype('float32')
    return msk


# 建立数据集
class BagDataset(Dataset):
    # 初始化参数
    def __init__(self, data_path, transform=None):
        # 返回列表,list中包含了path中的所有文件的名称:Q1
        self.imgs = os.listdir(data_path + '/images')
        self.msks = os.listdir(data_path + '/masks')

        # 查看list的大小是否一致,即imgs和msks的数目是否一致:Q2
        assert len(self.imgs) == len(self.msks), 'Number does not match'

        # 定义数据预处理方式
        self.transform = transform

        # 建立索引list: Q3
        self.imgs_and_msks = []
        for i in range(len(self.imgs)):
            self.imgs_and_msks.append(
                (data_path + '/images/' + self.imgs[i], data_path + '/masks/' + self.msks[i])
                # os.path.join(data_path, 'images', self.imgs[i]), os.path.join(data_path, 'masks', self.msks[i])
            )

    def __getitem__(self, item):
        # 路径索引
        img_path, msk_path = self.imgs_and_msks[item]

        img = cv2.imread(img_path)
        img = cv2.resize(img, (224, 224))

        msk = cv2.imread(msk_path, 0)
        msk = cv2.resize(msk, (224, 224))
        msk = msk / 255
        msk = msk.astype('uint8')
        msk = onehot(2, msk)
        msk = msk.transpose(2, 0, 1)

        if self.transform is not None:
            img = self.transform(img)

        return img, msk

    def __len__(self):
        return len(self.imgs)


if __name__ == '__main__':
    pass

2 FCN.py

import torch.nn as nn
from torchvision import models
from torchvision.models.vgg import VGG

# 用来遍历存储特征图
ranges = {
    'vgg11': ((0, 3), (3, 6), (6, 11), (11, 16), (16, 21)),
    'vgg13': ((0, 5), (5, 10), (10, 15), (15, 20), (20, 25)),
    'vgg16': ((0, 5), (5, 10), (10, 17), (17, 24), (24, 31)),
    'vgg19': ((0, 5), (5, 10), (10, 19), (19, 28), (28, 37)),
}

# VGG网络结构配置
cfg = {
    'vgg11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'vgg13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'vgg16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
    'vgg19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}


# 由cfg构建Vgg-Net
def make_layers(cfg, batch_norm=False):
    layers = []
    in_channels = 3
    for v in cfg:
        if v == 'M':
            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]  # Q6:为什么加中括号
        else:
            conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
            if batch_norm:
                layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
            else:
                layers += [conv2d, nn.ReLU(inplace=True)]

            in_channels = v

    return nn.Sequential(*layers)


# 开始构建VGGNet类, 重写初始化 __init__ 和 forward 方法
class VGGNet(VGG):
    def __init__(self, pretrained=True, model='vgg16', remove_fc=True,
                 requires_grad=True, show_params=False):
        super().__init__(make_layers(cfg[model]))  # Q7: 这是什么操作

        self.ranges = ranges[model]

        # 获取VGG模型训练好的参数,并加载
        if pretrained:  # Q8
            # 模型未下载
            exec("self.load_state_dict(models.%s(pretrained=True).state_dict())" % model)
            # # 模型已下载
            # vgg16 = models.vgg16(pretrained=False)
            # vgg16.load_state_dict(torch.load('root'))

        if not requires_grad:  # Q9
            for param in super().parameters():
                param.requires_grad = False

        # 去掉全连接层(classifier)
        if remove_fc:  # Q10
            del self.classifier

        if show_params:  # Q11
            for name, param in self.named_parameters():
                print(name, param.size())

    def forward(self, x):
        output = {}  # 用来存放池化层得到的feature map
        for idx, (begin, end) in enumerate(self.ranges):  # Q12
            for layer in range(begin, end):
                x = self.features[layer](x)  # Q13

            output["x%d" % (idx + 1)] = x

        return output


# FCN32s
class FCN32s(nn.Module):
    def __init__(self, pretrained_net, n_class):
        super().__init__()
        self.n_class = n_class
        self.pretrained_net = pretrained_net

        self.relu = nn.ReLU(inplace=True)
        self.deconv1 = nn.ConvTranspose2d(512, 512, 3, 2, 1, output_padding=1, dilation=1)
        self.bn1 = nn.BatchNorm2d(512)
        self.deconv2 = nn.ConvTranspose2d(512, 256, 3, 2, 1, output_padding=1, dilation=1)
        self.bn2 = nn.BatchNorm2d(256)
        self.deconv3 = nn.ConvTranspose2d(256, 128, 3, 2, 1, 1, dilation=1)
        self.bn3 = nn.BatchNorm2d(128)
        self.deconv4 = nn.ConvTranspose2d(128, 64, 3, 2, 1, 1, dilation=1)
        self.bn4 = nn.BatchNorm2d(64)
        self.deconv5 = nn.ConvTranspose2d(64, 32, 3, 2, 1, 1, dilation=1)
        self.bn5 = nn.BatchNorm2d(32)

        self.classifier = nn.Conv2d(32, n_class, kernel_size=1)

    def forward(self, x):
        output = self.pretrained_net(x)
        x5 = output['x5']  # maxpooling5的feature map (1/32)

        score = self.bn1(self.relu(self.deconv1(x5)))  # (1/16): (b,512,7,7)    => (b,512,14,14)
        score = self.bn2(self.relu(self.deconv2(score)))  # (1/8):  (b,512,14,14)  => (b,256,28,28)
        score = self.bn3(self.relu(self.deconv3(score)))  # (1/4):  (b,256,28,28)  => (b,128,56,56)
        score = self.bn4(self.relu(self.deconv4(score)))  # (1/2):  (b,128,56,56)  => (b,64,112,112)
        score = self.bn5(self.relu(self.deconv5(score)))  # (1):    (b,64,112,112) => (b,32,224,224)
        score = self.classifier(score)  # size不变,输出的channel等于类别数 (b,n_class,224,224)

        return score


# FCN16s
class FCN16s(nn.Module):
    def __init__(self, pretrained_net, n_class):
        super().__init__()
        self.n_class = n_class
        self.pretrained_net = pretrained_net

        self.relu = nn.ReLU(inplace=True)
        self.deconv1 = nn.ConvTranspose2d(512, 512, 3, 2, 1, 1, dilation=1)
        self.bn1 = nn.BatchNorm2d(512)
        self.deconv2 = nn.ConvTranspose2d(512, 256, 3, 2, 1, 1, dilation=1)
        self.bn2 = nn.BatchNorm2d(256)
        self.deconv3 = nn.ConvTranspose2d(256, 128, 3, 2, 1, 1, dilation=1)
        self.bn3 = nn.BatchNorm2d(128)
        self.deconv4 = nn.ConvTranspose2d(128, 64, 3, 2, 1, 1, dilation=1)
        self.bn4 = nn.BatchNorm2d(64)
        self.deconv5 = nn.ConvTranspose2d(64, 32, 3, 2, 1, 1, dilation=1)
        self.bn5 = nn.BatchNorm2d(32)

        self.classifier = nn.Conv2d(32, n_class, kernel_size=1)

    def forward(self, x):
        output = self.pretrained_net(x)
        x5 = output['x5']
        x4 = output['x4']

        score = self.relu(self.deconv1(x5))
        score = self.bn1(score + x4)
        score = self.bn2(self.relu(self.deconv2(score)))
        score = self.bn3(self.relu(self.deconv3(score)))
        score = self.bn4(self.relu(self.deconv4(score)))
        score = self.bn5(self.relu(self.deconv5(score)))
        score = self.classifier(score)

        return score


# FCN8s
class FCN8s(nn.Module):
    def __init__(self, pretrained_net, n_class):
        super().__init__()
        self.n_class = n_class
        self.pretrained_net = pretrained_net

        self.relu = nn.ReLU(inplace=True)
        self.deconv1 = nn.ConvTranspose2d(512, 512, 3, 2, 1, 1, dilation=1)
        self.bn1 = nn.BatchNorm2d(512)
        self.deconv2 = nn.ConvTranspose2d(512, 256, 3, 2, 1, 1, dilation=1)
        self.bn2 = nn.BatchNorm2d(256)
        self.deconv3 = nn.ConvTranspose2d(256, 128, 3, 2, 1, 1, dilation=1)
        self.bn3 = nn.BatchNorm2d(128)
        self.deconv4 = nn.ConvTranspose2d(128, 64, 3, 2, 1, 1, dilation=1)
        self.bn4 = nn.BatchNorm2d(64)
        self.deconv5 = nn.ConvTranspose2d(64, 32, 3, 2, 1, 1, dilation=1)
        self.bn5 = nn.BatchNorm2d(32)

        self.classifier = nn.Conv2d(32, n_class, kernel_size=1)

    def forward(self, x):
        output = self.pretrained_net(x)
        x5 = output['x5']
        x4 = output['x4']
        x3 = output['x3']

        score = self.relu(self.deconv1(x5))
        score = self.bn1(score + x4)
        score = self.relu(self.deconv2(score))
        score = self.bn2(score + x3)
        score = self.bn3(self.relu(self.deconv3(score)))
        score = self.bn4(self.relu(self.deconv4(score)))
        score = self.bn5(self.relu(self.deconv5(score)))
        score = self.classifier(score)

        return score


# FCNs
class FCNs(nn.Module):
    def __init__(self, pretrained_net, n_class):
        super().__init__()
        self.n_class = n_class                   # 定义需要分割目标的类别数
        self.pretrained_net = pretrained_net     # 定义预训练的网络

        self.relu = nn.ReLU(inplace=True)
        self.deconv1 = nn.ConvTranspose2d(512, 512, 3, 2, 1, 1, dilation=1)
        self.bn1 = nn.BatchNorm2d(512)
        self.deconv2 = nn.ConvTranspose2d(512, 256, 3, 2, 1, 1, dilation=1)
        self.bn2 = nn.BatchNorm2d(256)
        self.deconv3 = nn.ConvTranspose2d(256, 128, 3, 2, 1, 1, dilation=1)
        self.bn3 = nn.BatchNorm2d(128)
        self.deconv4 = nn.ConvTranspose2d(128, 64, 3, 2, 1, 1, dilation=1)
        self.bn4 = nn.BatchNorm2d(64)
        self.deconv5 = nn.ConvTranspose2d(64, 32, 3, 2, 1, 1, dilation=1)
        self.bn5 = nn.BatchNorm2d(32)

        self.classifier = nn.Conv2d(32, n_class, kernel_size=1)

    def forward(self, x):
        output = self.pretrained_net(x)  # batch_size = b
        x5 = output['x5']                # Size = [b, 512, 7, 7]
        x4 = output['x4']                # Size = [b, 512, 14, 14]
        x3 = output['x3']                # Size = [b, 256, 28, 28]
        x2 = output['x2']                # Size = [b, 128, 56, 56]
        x1 = output['x1']                # Size = [b, 64, 112, 112]

        score = self.relu(self.deconv1(x5))               # Size = [b, 512, 14, 14]
        score = self.bn1(score + x4)
        score = self.relu(self.deconv2(score))            # Size = [b, 256, 28, 28]
        score = self.bn2(score + x3)
        score = self.relu(self.deconv3(score))            # Size = [b, 128, 56, 56]
        score = self.bn3(score + x2)
        score = self.relu(self.deconv4(score))            # Size = [b, 64, 112, 112]
        score = self.bn4(score + x1)
        score = self.bn5(self.relu(self.deconv5(score)))  # Size = [b, 32, 224, 224]
        score = self.classifier(score)                    # Size = [b, n_class, 224, 224]

        return score


# 采用卷积进行融合
class MyFCNs(nn.Module):
    def __init__(self, pretrained_net, n_class):
        super().__init__()
        self.n_class = n_class                   # 定义需要分割目标的类别数
        self.pretrained_net = pretrained_net     # 定义预训练的网络

        self.relu = nn.ReLU(inplace=True)
        self.deconv1 = nn.ConvTranspose2d(512, 512, 3, 2, 1, 1, dilation=1)
        self.fuse1 = nn.Conv2d(1024, 512, 3, 1, 1)
        self.bn1 = nn.BatchNorm2d(512)
        self.deconv2 = nn.ConvTranspose2d(512, 256, 3, 2, 1, 1, dilation=1)
        self.fuse2 = nn.Conv2d(512, 256, 3, 1, 1)
        self.bn2 = nn.BatchNorm2d(256)
        self.deconv3 = nn.ConvTranspose2d(256, 128, 3, 2, 1, 1, dilation=1)
        self.fuse3 = nn.Conv2d(256, 128, 3, 1, 1)
        self.bn3 = nn.BatchNorm2d(128)
        self.deconv4 = nn.ConvTranspose2d(128, 64, 3, 2, 1, 1, dilation=1)
        self.fuse4 = nn.Conv2d(128, 64, 3, 1, 1)
        self.bn4 = nn.BatchNorm2d(64)
        self.deconv5 = nn.ConvTranspose2d(64, 32, 3, 2, 1, 1, dilation=1)
        self.bn5 = nn.BatchNorm2d(32)

        self.classifier = nn.Conv2d(32, n_class, kernel_size=1)

    def forward(self, x):
        output = self.pretrained_net(x)  # batch_size = b
        x5 = output['x5']                # Size = [b, 512, 7, 7]
        x4 = output['x4']                # Size = [b, 512, 14, 14]
        x3 = output['x3']                # Size = [b, 256, 28, 28]
        x2 = output['x2']                # Size = [b, 128, 56, 56]
        x1 = output['x1']                # Size = [b, 64, 112, 112]

        score = self.relu(self.deconv1(x5))               # Size = [b, 512, 14, 14]
        score = torch.cat((score, x4), dim=1)
        score = self.fuse1(score)
        score = self.bn1(score)
        
        score = self.relu(self.deconv2(score))            # Size = [b, 256, 28, 28]
        score = torch.cat((score, x3), dim=1)
        score = self.fuse2(score)
        score = self.bn2(score)
        
        score = self.relu(self.deconv3(score))            # Size = [b, 128, 56, 56]
        score = torch.cat((score, x2), dim=1)
        score = self.fuse3(score)
        score = self.bn3(score)
        
        score = self.relu(self.deconv4(score))            # Size = [b, 64, 112, 112]
        score = torch.cat((score, x1), dim=1)
        score = self.fuse4(score)
        score = self.bn4(score)
        
        score = self.bn5(self.relu(self.deconv5(score)))  # Size = [b, 32, 224, 224]
        score = self.classifier(score)                    # Size = [b, n_class, 224, 224]

        return score



if __name__ == '__main__':
    vgg = VGGNet(show_params=True)
    print(vgg)

3 train.py

import torch
import numpy as np
from torch import nn
from torch import optim
from datetime import datetime
from torchvision import transforms
from torch.utils.data import DataLoader
import matplotlib.pylab as plt

from BagData import BagDataset
from FCN import VGGNet, FCNs, FCN8s, FCN16s, FCN32s


torch.manual_seed(1)

# 超参数设置
batch_size = 10  # 按批次训练数据,每批为8个样本
Epochs = 20  # 整个网络的循环训练次数
LR = 0.1  # 学习率
Momentum = 0.7
log_interval = 5  # 每跑5批次进行一次日志记录
n_class = 2  # 类别数,单目标分割,只有前背景

# 模型
FCN = FCNs

# 数据路径
train_path = '../BagData/train'
test_path = '../BagData/test'


# 归一化[-1.0, 1.0]
transform1 = transforms.ToTensor()
transform2 = transforms.Normalize(
    mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
)
transform = transforms.Compose([transform1, transform2])

# 加载训练集
train_data = BagDataset(train_path, transform=transform)
train_dataloader = DataLoader(dataset=train_data, batch_size=batch_size, shuffle=True)
# 加载测试集
test_data = BagDataset(test_path, transform=transform)
test_dataloader = DataLoader(dataset=test_data, batch_size=batch_size, shuffle=True)

# 查看是否有gpu可用
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# 设置网络模型
vgg = VGGNet(pretrained=True, requires_grad=True)
net = FCN(pretrained_net=vgg, n_class=n_class)
net = net.to(device)

# 交叉熵损失函数
criterion = nn.BCELoss().to(device)
# 优化函数
optimizer = optim.SGD(net.parameters(), lr=LR, momentum=Momentum)


def train(epoch):
    prev_time = datetime.now()
    train_loss = 0
    for batch_idx, (img, msk) in enumerate(train_dataloader):
        img = img.to(device)  # [10, 3, 224, 224]
        msk = msk.to(device)  # [10, 2, 224, 224]

        output = net(img)
        output = torch.sigmoid(output)  # [10, 2, 224, 224]
        loss = criterion(output, msk)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_loss += loss.item()

        if batch_idx % log_interval == 0:  # 日志记录
            print('Epoch:[{}/{}]\tStep:[{}/{}]\tLoss:{:.6f}'.format(
                epoch, Epochs, (batch_idx + 1) * len(img), len(train_dataloader.dataset), loss.item()
            ))

        # 显示过程
        output_np = output.cpu().detach().numpy().copy()  # numpy.ndarray: (10, 2, 224, 224), float32
        output_np = np.argmin(output_np, axis=1)  # (10, 224, 224), int64
        output_np = output_np[0, ...]  # 哪个位置为零,去掉第几个个数,(10,224,224)=> (224, 224), int64

        img_np = img.cpu().data.numpy().copy()
        img_np = np.argmin(img_np, axis=1)
        img_np = img_np[0, ...]

        msk_np = msk.cpu().detach().numpy().copy()
        msk_np = np.argmin(msk_np, axis=1)
        msk_np = msk_np[0, ...]

        plt.subplot(1, 3, 1)
        plt.imshow(img_np)
        # plt.axis('off')  # 不显示坐标轴
        plt.subplot(1, 3, 2)
        plt.imshow(msk_np, 'gray')
        # plt.axis('off')
        plt.subplot(1, 3, 3)
        plt.imshow(output_np, 'gray')
        # plt.axis('off')
        plt.pause(1)


    test_loss = 0
    all_test_iter_loss = []
    with torch.no_grad():
        for batch_idx, (img, msk) in enumerate(test_dataloader):
            img = img.to(device)
            msk = msk.to(device)

            output = net(img)
            output = torch.sigmoid(output)
            loss = criterion(output, msk)
            optimizer.zero_grad()

            iter_loss = loss.item()
            all_test_iter_loss.append(iter_loss)
            test_loss += iter_loss

            output_np = output.cpu().detach().numpy().copy()  # 复制一份数据,从GPU上转换到CPU上,并把Tensor转换成numpy格式,shape=(1,2,224,224)
            # 很巧秒的方法来降维,且保留了数据
            output_np = np.argmin(output_np, axis=1)  # 求axis=1上最小值的下标,shape=(1,224,224)
            output_np = output_np[0, ...]
            # output_np = np.squeeze(output_np)

            msk_np = msk.cpu().detach().numpy().copy()
            msk_np = np.argmin(msk_np, axis=1)
            msk_np = msk_np[0, ...]
            # msk_np = np.squeeze(msk_np)

            plt.subplot(1, 2, 1)
            plt.imshow(msk_np, 'gray')
            # plt.axis('off')  # 不显示坐标轴
            plt.subplot(1, 2, 2)
            plt.imshow(output_np, 'gray')  # 显示灰度图
            # plt.axis('off')
            plt.pause(1)  # 显示2s

    cur_time = datetime.now()
    h, remainder = divmod((cur_time - prev_time).seconds, 3600)
    m, s = divmod(remainder, 60)
    time_str = "%02d:%02d:%02d" % (h, m, s)

    print('Epoch: %d, epoch train loss: %.6f, epoch test loss: %.6f, time using: %s'
          % (epoch, train_loss/len(train_dataloader), test_loss/len(test_dataloader), time_str))

    if np.mod(epoch, 5) == 0:
        torch.save(net, '../checkpoints/FCNs/net_{}.pth'.format(epoch))  # 保存训练好的网络模型
        print('saving checkpoints/net_{}.pth'.format(epoch))


if __name__ == '__main__':
    for epoch in range(1, Epochs+1):
        train(epoch)

4 predict.py

import cv2
import torch
import numpy as np
import matplotlib.pyplot as plt
from torchvision import transforms


img_path = '../pic/4.jpg'  # 预测的图片
net_path = '../checkpoints/FCN8s/net_20.pth'

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# 加载模型
model = torch.load(net_path)
model = model.to(device)

transform1 = transforms.ToTensor()
transform2 = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
transform = transforms.Compose([transform1, transform2])


def predict():
    img_op = cv2.imread(img_path)
    img_op = cv2.resize(img_op, (224, 224))

    img = transform(img_op)
    img = img.to(device)  # (3, 224, 224)
    img = img.unsqueeze(0)  # (1, 3, 224, 224)

    output = model(img)
    output = torch.sigmoid(output)

    output_np = output.cpu().detach().numpy().copy()  # (1, 2, 224, 224)
    output_np = np.argmin(output_np, axis=1)  # (1, 224, 224)
    output_np = np.squeeze(output_np)
    output_np = output_np * 255

    plt.subplot(1, 2, 1)
    plt.imshow(img_op)
    plt.axis('off')
    plt.subplot(1, 2, 2)
    plt.imshow(output_np)
    plt.axis('off')
    plt.pause(3)

    cv2.imwrite('../results/pic4/FCN8s.jpg', output_np)
    print('result/pic4/FCN8s.jpg')


if __name__ == "__main__":
    predict()

 

 

 

  • 0
    点赞
  • 8
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值