pytorch12-基于VGG19搭建FCN-8s语义分割网络（VOC2012数据集）

最新推荐文章于 2024-07-09 21:10:27 发布
一头秀发的假程序猿
最新推荐文章于 2024-07-09 21:10:27 发布
阅读量976
点赞数
分类专栏： Pytorch深度学习学习笔记文章标签： python 深度学习机器学习语义分割
本文链接：https://blog.csdn.net/qq_36071362/article/details/117630656
版权
Pytorch深度学习学习笔记专栏收录该内容
12 篇文章 1 订阅
订阅专栏
import copy
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
import torch
from torchsummary import summary
from torch import nn
import torch.nn.functional as F
import torch.utils.data as Data
import torch.optim as optim
from torchvision import transforms
from torchvision.models import vgg19
from torchvision.utils import make_grid

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

torch.cuda.empty_cache()

classess = ['background', 'aeroplane', 'bicycle', 'bird', 'boat',
            'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable',
            'dog', 'horse', 'motorbike', 'person', 'potted plant',
            'sheep', 'sofa', 'train', 'tv/monitor']

colormap = [[0, 0, 0], [128, 0, 0], [0, 128, 0], [128, 128, 0], [0, 0, 128],
            [128, 0, 128], [0, 128, 128], [128, 128, 128], [64, 0, 0], [192, 0, 0],
            [64, 128, 0], [192, 128, 0], [64, 0, 128], [192, 0, 128],
            [64, 128, 128], [192, 128, 128], [0, 64, 0], [128, 64, 0],
            [0, 192, 0], [128, 192, 0], [0, 64, 128]]


# 给定一个标好的图片，将像素值对应的物体类别找出来
def image2label(image, colormap):
    # 将标签转化为每个像素值为一类数据
    cm2lbl = np.zeros(256 ** 3)
    for i, cm in enumerate(colormap):
        cm2lbl[(cm[0] * 256 + cm[1] * 256 + cm[2])] = i
    # 对一张图像进行转换
    image = np.array(image, dtype="int")
    ix = (image[:, :, 0] * 256 + image[:, :, 1] * 256 + image[:, :, 2])
    image2 = cm2lbl[ix]
    return image2


# 随机裁剪图像数据
def rand_crop(data, label, high, width):
    im_width, im_high = data.size
    # 生成随机点的位置
    left = np.random.randint(0, im_width - width)
    top = np.random.randint(0, im_high - high)
    right = left + width
    bottom = top + high
    data = data.crop((left, top, right, bottom))
    label = label.crop((left, top, right, bottom))
    return data, label


# 单组图像的转换操作
def img_transforms(data, label, high, width, colormap):
    data, label = rand_crop(data, label, high, width)
    data_tfs = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])])
    data = data_tfs(data)
    label = torch.from_numpy(image2label(label, colormap))
    return data, label


# 定义需要读出的数据路径的函数
def read_image_path(root="K:/voc2012-dataset/VOCdevkit/VOC2012/ImageSets/Segmentation/train.txt"):
    """"保存指定路径下的所有需要读取的图像文件路径"""
    image = np.loadtxt(root, dtype=str)
    # print("image", image)
    n = len(image)
    data, label = [None] * n, [None] * n
    for i, fname in enumerate(image):
        data[i] = "K:/voc2012-dataset/VOCdevkit/VOC2012/JPEGImages/%s.jpg" % (fname)
        label[i] = "K:/voc2012-dataset/VOCdevkit/VOC2012/SegmentationClass/%s.png" % (fname)
    return data, label


# 定义一个MyDataset继承于torch.utils.data.Dataset
class Mydataset(Data.Dataset):
    # 用于读取图像，进行相应的裁剪等
    def __init__(self, data_root, high, width, imtransform, colormap):
        # data_root 数据所对应的文件名，high,width:图像裁剪后的尺寸
        # imtransform:图像预处理操作，colormap：颜色
        self.data_root = data_root
        self.high = high
        self.width = width
        self.imtransform = imtransform
        self.colormap = colormap
        data_list, label_list = read_image_path(root=data_root)
        self.data_list = self._filter(data_list)
        self.label_list = self._filter(label_list)

    def _filter(self, images):
        # 过滤掉图片大于指定图片大小的图片
        return [im for im in images if (Image.open(im).size[1] > high and Image.open(im).size[0] > width)]

    def __getitem__(self, idx):
        img = self.data_list[idx]
        label = self.label_list[idx]
        img = Image.open(img)
        label = Image.open(label).convert('RGB')
        img, label = self.imtransform(img, label, self.high, self.width, self.colormap)
        return img, label

    def __len__(self):
        return len(self.data_list)


# 将标准化后的数据转化为0-1的区间
def inv_normalize_image(data):
    rgb_mean = np.array([0.485, 0.456, 0.406])
    rgb_std = np.array([0.229, 0.224, 0.225])
    data = data.astype('float32') * rgb_std + rgb_mean
    return data.clip(0, 1)


# 从预测的标签转化为图像的操作
def label2_image(prelabel, colormap):
    # 预测的标签转化为图像，针对一个标签图
    h, w = prelabel.shape
    prelabel = prelabel.reshape(h * w, -1)
    image = np.zeros((h * w, 3), dtype="int32")
    for ii in range(len(colormap)):
        index = np.where(prelabel == ii)
        image[index, :] = colormap[ii]
    return image.reshape(h, w, 3)


class FCN8s(nn.Module):
    def __init__(self, num_classes):
        super(FCN8s, self).__init__()
        # num_classes 训练数据的类别
        self.num_classes = num_classes

        # 使用预训练好的vgg19网络作为基础网络
        model_vgg19 = vgg19(pretrained=True)
        # 不使用vgg19网络中的后面的adaptiveavgpool2d和linear层
        self.base_model = model_vgg19.features
        # 定义需要的额几个层操作
        self.relu = nn.ReLU(inplace=True)
        self.deconv1 = nn.ConvTranspose2d(512, 512, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.bn1 = nn.BatchNorm2d(512)
        self.deconv2 = nn.ConvTranspose2d(512, 256, 3, 2, 1, 1, 1)
        self.bn2 = nn.BatchNorm2d(256)
        self.deconv3 = nn.ConvTranspose2d(256, 128, 3, 2, 1, 1, 1)
        self.bn3 = nn.BatchNorm2d(128)
        self.deconv4 = nn.ConvTranspose2d(128, 64, 3, 2, 1, 1, 1)
        self.bn4 = nn.BatchNorm2d(64)
        self.deconv5 = nn.ConvTranspose2d(64, 32, 3, 2, 1, 1, 1)
        self.bn5 = nn.BatchNorm2d(32)
        self.classifier = nn.ConvTranspose2d(32, num_classes, kernel_size=1)

        # vgg19中maxpool2所在的层
        self.layers = {"4": "max_pool_1", "9": "maxpool_2",
                       "18": "maxpool_3", "27": "maxpool_4",
                       "36": "maxpool_5"}

    def forward(self, x):
        output = {}
        for name, layer in self.base_model._modules.items():
            # 从第一层开始获取图像的特征
            x = layer(x)
            # 如果是layer中指定的特征，那就保存到output中‘
            if name in self.layers:
                output[self.layers[name]] = x
        x5 = output["maxpool_5"]
        x4 = output["maxpool_4"]
        x3 = output["maxpool_3"]

        # 对图像进行相应转置卷积操作，逐渐将图像放大到原来大小
        score = self.relu(self.deconv1(x5))
        score = self.bn1(score + x4)
        score = self.relu(self.deconv2(score))
        score = self.bn2(score + x3)
        score = self.bn3(self.relu(self.deconv3(score)))
        score = self.bn4(self.relu(self.deconv4(score)))
        score = self.bn5(self.relu(self.deconv5(score)))
        score = self.classifier(score)
        return score


def train_model(model, criterion, optimizer, traindataloader, valdataloader, num_epochs=25):
    since = time.time()
    best_models_wts = copy.deepcopy(model.state_dict())
    bestloss = 1e10
    train_loss_all = []
    train_acc_all = []
    val_acc_all = []
    val_loss_all = []
    since = time.time()
    for epoch in range(0, num_epochs):
        print('Epoch{}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)
        train_loss = 0
        train_num = 0
        val_loss = 0
        val_num = 0
        # 每个epoch包括训练和验证阶段
        model.train()
        for step, (b_x, b_y) in enumerate(traindataloader):
            optimizer.zero_grad()
            b_x = b_x.float().to(device)
            b_y = b_y.long().to(device)
            out = model(b_x)
            out = F.log_softmax(out, dim=1)
            pre_lab = torch.argmax(out, 1)  # 预测的标签
            loss = criterion(out, b_y)
            loss.backward()
            optimizer.step()
            train_loss += loss.item() * len(b_y)
            train_num += len(b_y)
        # 计算一个epoch在训练集上的损失和精度
        train_loss_all.append(train_loss / train_num)
        print('{} Train loss: {:.4f}'.format(epoch, train_loss_all[-1]))

        # 计算一个epoch在训练后在验证集上的损失和精度
        model.eval()
        for step, (b_x, b_y) in enumerate(valdataloader):
            b_x = b_x.float().to(device)
            b_y = b_y.long().to(device)

            if hasattr(torch.cuda, 'empty_cache'):
                torch.cuda.empty_cache()

            out = model(b_x)
            out = F.log_softmax(out, dim=1)
            pre_lab = torch.argmax(out, 1)
            loss = criterion(out, b_y)
            val_loss += loss.item() * len(b_y)
            val_num += len(b_y)

        # 计算一个epoc在验证集上的损失和精度
        val_loss_all.append(val_loss / val_num)
        print('{} Val Loss:{:.4f}'.format(epoch, val_loss_all[-1]))
        # 保存最好的网络参数
        if val_loss_all[-1] < bestloss:
            bestloss = val_loss_all[-1]
            best_models_wts = copy.deepcopy(model.state_dict())
        # 每个epoch的花费时间
        time_use = time.time() - since
        print("Train and Val complete in {:.0f}m {:.0f}s".format(time_use // 60, time_use % 60))
    train_process = pd.DataFrame(
        data={"epoch": range(num_epochs),
              "train_loss_all": train_loss_all,
              "val_loss_all": val_loss_all})
    # 输出最好的模型
    model.load_state_dict(best_models_wts)
    return model, train_process


if __name__ == "__main__":
    high, width = 320, 480
    voc_train = Mydataset("K:/voc2012-dataset/VOCdevkit/VOC2012/ImageSets/Segmentation/train.txt", high, width,
                          img_transforms, colormap)
    voc_val = Mydataset("K:/voc2012-dataset/VOCdevkit/VOC2012/ImageSets/Segmentation/val.txt", high, width,
                        img_transforms, colormap)

    # 创建数据加载器，每个batch使用4张图像
    train_loader = Data.DataLoader(voc_train, batch_size=2, shuffle=True, num_workers=0, pin_memory=True)
    val_loader = Data.DataLoader(voc_val, batch_size=2, shuffle=True, num_workers=0, pin_memory=True)

    # 可视化一个batch的数据
    # for step, (b_x, b_y) in enumerate(train_loader):
    #     if step > 0:
    #         break
    #     # 输出训练图像的尺寸和标签的尺寸，以及数据类型
    #     print("b_x.shape:", b_x.shape)
    #     print("b_y.shape:", b_y.shape)
    #
    #     b_x_numpy = b_x.data.numpy()
    #     b_x_numpy = b_x_numpy.transpose(0,2,3,1)
    #     b_y_numpy = b_y.data.numpy()
    #     plt.figure(figsize=(16,6))
    #
    #     for ii in range(4):
    #         plt.subplot(2,4,ii+1)
    #         plt.imshow(inv_normalize_image(b_x_numpy[ii]))
    #         plt.axis("off")
    #         plt.subplot(2,4,ii+5)
    #         plt.imshow(label2_image(b_y_numpy[ii],colormap))
    #         plt.axis("off")
    #     plt.subplots_adjust(wspace=0.1,hspace=0.1)
    #     plt.show()

    fcn8s = FCN8s(21).to(device)
    summary(fcn8s, input_size=(3, high, width))

    LR = 0.0003
    criterion = nn.NLLLoss()
    optimizer = optim.Adam(fcn8s.parameters(), lr=LR, weight_decay=1e-4)
    # d对模型进行训练，对所有的数据训练epoch轮
    fcn8s, train_process = train_model(
        fcn8s, criterion, optimizer, train_loader,
        val_loader, num_epochs=20
    )
    torch.save(fcn8s, "fcn8s.pkl")
    plt.figure(figsize=(10, 6))
    plt.plot(train_process.epoch, train_process.train_loss_all,
             "ro-", label="Train loss")
    plt.plot(train_process.epoch, train_process.val_loss_all,
             "bs-", label="Val loss")
    plt.legend()
    plt.xlabel("epoch")
    plt.ylabel("Loss")
    plt.show()  # 训练及验证到此结束

    # 从验证集中获取一个batch的数据
    for step, (b_x, b_y) in enumerate(val_loader):
        if step > 0:
            break
        fcn8s.eval()
        b_x = b_x.float().to(device)
        b_y = b_y.long().to(device)
        out = fcn8s(b_x)
        out = F.log_softmax(out, dim=1)
        pre_lab = torch.argmax(out, 1)
        # 可视化一个batch的图像，检查数据预处理是否正确
        b_x_numpy = b_x.cpu().data.numpy()
        b_x_numpy = b_x_numpy.transpose(0, 2, 3, 1)
        b_y_numpy = b_y.cpu().data.numpy()
        pre_lab_numpy = pre_lab.cpu().data.numpy()
        plt.figure(figsize=(16, 9))
        for ii in range(2):
            plt.subplot(3, 2, ii + 1)
            plt.imshow(inv_normalize_image(b_x_numpy[ii]))
            plt.axis("off")
            plt.subplot(3, 2, ii + 5)
            plt.imshow(label2_image(b_y_numpy[ii], colormap))
            plt.axis("off")
            plt.subplot(3, 2, ii + 9)
            plt.imshow(label2_image(pre_lab_numpy[ii], colormap))
            plt.axis("off")
        plt.subplots_adjust(wspace=0.05, hspace=0.05)
        plt.show()
4G的GPU训练140min左右，具体注释及理解后期添加
一头秀发的假程序猿
关注
0
点赞
踩
15

收藏

觉得还不错? 一键收藏
0
评论
pytorch12-基于VGG19搭建FCN-8s语义分割网络（VOC2012数据集）

import copyimport timeimport numpy as npimport pandas as pdimport matplotlib.pyplot as pltfrom PIL import Imageimport torchfrom torchsummary import summaryfrom torch import nnimport torch.nn.functional as Fimport torch.utils.data as Dataimport .
复制链接

扫一扫
专栏目录