FCN_1

13 篇文章 0 订阅
10 篇文章 0 订阅
import os
import random
import numpy as np
import torch
from torch.utils.data import Dataset
import cv2
from torch.utils.data import DataLoader
import  torch.nn as nn
from torch.autograd import Variable  # 模型输入的类型为 Variable
import torch
import  matplotlib.pyplot as plt
from torchvision import transforms as tfs  # 先引入对数据预处理的库
from PIL import Image  # Python的图像库PIL可以处理图像,其中,他的一个类Image提供了open()、save()、convert()、show()来处理图像
from torchvision import models
import time
import torch.nn.functional as F



voc_root="../data/VOCdevkit/VOC2012/"  # 数据存放的位置

classes = ['background','aeroplane','bicycle','bird','boat',
           'bottle','bus','car','cat','chair','cow','diningtable',
           'dog','horse','motorbike','person','potted plant',
           'sheep','sofa','train','tv/monitor']

colormap = [[0,0,0],[128,0,0],[0,128,0], [128,128,0], [0,0,128],
            [128,0,128],[0,128,128],[128,128,128],[64,0,0],[192,0,0],
            [64,128,0],[192,128,0],[64,0,128],[192,0,128],
            [64,128,128],[192,128,128],[0,64,0],[128,64,0],
            [0,192,0],[128,192,0],[0,64,128]]

cm2lbl=np.zeros(256**3) # 创建一个初值为 0 的,维度是255 * 255 * 255的array([])
for i,cm in enumerate(colormap):  # 遍历21个类别的RGB图,将它们的RGB值用一个数值代替,保存的是索引值,而cm保存的是索引i下面的值
    cm2lbl[(cm[0]*256+cm[1])*256+cm[2]]=i  # 参考了三个维度,将每一个图片都赋值给一个值,下面最直接通过一个值进行索引就可以了

def image2label(im):
    data = np.array(im, dtype='int32')  # label就是由0、1组成的array
    # print(data.shape) # (366, 500, 3)
    idx = (data[:, :, 0] * 256 + data[:, :, 1]) * 256 + data[:, :, 2]
    # print(idx.shape)  # (366, 500)
    return np.array(cm2lbl[idx], dtype='int64')

'''
label_im = Image.open('../data/VOCdevkit/VOC2012/SegmentationClass/2007_000033.png').convert('RGB') # 可能原来的像素是BGR的
label_im.show()  # 显示一张PNG形式的图片
label = image2label(label_im)  # 通过 image2labe()函数将 2007_000033.png 进行转化
# print(label.shape)  # 原来图片是366 * 500 * 3的,现在是366*500
# print(label[150:160, 240:250])

'''

def read_images(root=voc_root,train=True): # 读取训练集 和 验证集
    txt_filename=root+"/ImageSets/Segmentation/"+('train.txt'if train else 'val.txt') # train=True的时候返回‘train.txt' 否则返回'val.txt'
    with open(txt_filename,'r') as f: # 利用 with open( ) as f的方式打开文件夹,当数据读完的时候,不需要手动关闭了
        images=f.read().split()  # split(str='' ,num= ),以str为分割,并且分割成num+1段,默认的分割符为空格
    data=[os.path.join(root,'JPEGImages',i+'.jpg')for i in images] # os提供了很多操作系统的功能接口,实现了对文件和目录的操作
                                                                  #  os.path.join()是用来拼接完整路径的,循环将图片读进data里
    label=[os.path.join(root,'SegmentationClass',i+'.png')for i in images]  # 循环将图片对应的标签读进label里
    return data,label # 返回读取的图片和标签


def rand_crop(data,label,height,width):  # 自定义随机切割函数,自定义要切割的高度和宽度
    h,w,_=data.shape  # 自定义这个函数,在处理图片的过程中,一定是一个图片一个图片的处理的,每一张图片大小不一,先计算每张图片的大小
    top=random.randint(0,h-height)  #  产生一个随机的整数
    left=random.randint(0,w-width)  #  产生一个随机的整数
    data=data[top:top+height,left:left+width]  # 进行切割
    label=label[top:top+height,left:left+width]  #进行切割
    return data,label  # 返回切好之后的图片


def img_transforms(im, label, crop_size):  # 该函数实现了对数据的预处理
    im, label = rand_crop(im, label, *crop_size)  # 该函数实现了对原始图片及其标签的随机切割
    im_tfs = tfs.Compose([
        tfs.ToTensor(),  # 将图片转化为pytorch中处理的对象Tensor,在转化的过程中,pytorch自动的将图片标准化,即Tensor的0-1
        tfs.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])  # 标准化,第一个参数是均值,第二个参数是方差
    ])

    im = im_tfs(im)
    label = image2label(label)

    label = torch.from_numpy(label)  # 将label从numpy.array转化为Tensor
    return im, label  # 返回处理后的图片以及标签


class VOCSegDataset(Dataset):  # 继承torch.utils.data中的Dataset
    def __init__(self, train, crop_size, transforms):  # 类的初始化
        self.crop_size = crop_size
        self.transforms = transforms
        data_list, label_list = read_images(train=train)  # 自定义函数read_images()读取训练集的数据

        self.data_list = self._filter(data_list)
        self.label_list = self._filter(label_list)

        print('训练集和测试集: '+ str(len(self.data_list)) )

    def _filter(self, images):  # 过滤掉图片大小小于 crop 大小的图片
        return [im for im in images if (Image.open(im).size[1] >= self.crop_size[0] and
                                        Image.open(im).size[0] >= self.crop_size[1])]

    def __getitem__(self, idx):  # 定义获取容器中指定元素的行为,相当于self[key],允许类对象可以有索引操作
        img = self.data_list[idx]
        label = self.label_list[idx]
        img = cv2.imread(img)  # 读取出图片
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # cv2.cvtColor()实现空间颜色的转换,由BGR->RGB
        label = cv2.imread(label)  # 读取图片的标签
        label = cv2.cvtColor(label, cv2.COLOR_BGR2RGB)  # cv2.cvtColor()实现空间颜色的转换,由BGR->RGB
        img, label = self.transforms(img, label, self.crop_size)
        return img, label

    def __len__(self):  # 定义获取容器中元素的个数
        return len(self.data_list)



################################################################
input_shape=(320,480)

voc_train=VOCSegDataset(True,input_shape,img_transforms)  # 包含了训练集的img和lable
voc_test=VOCSegDataset(False,input_shape,img_transforms)  # 包含了测试集的img和lable

voc_test.data_list

train_data=DataLoader(voc_train,batch_size=6,shuffle=True)  # 将训练集包装成Tensor
valid_data=DataLoader(voc_test,batch_size=6) # 将验证集包装成Tensor

#######################################################################


def bilinear_kernel(in_channels, out_channels, kernel_size):  # 双线性插值法,实现对图片的上采样,参数分别是输入通道,输出通道,卷积核的大小
    factor = (kernel_size + 1) // 2
    if kernel_size % 2 == 1:
        center = factor - 1
    else:
        center = factor - 0.5
    og = np.ogrid[:kernel_size, :kernel_size]
    filt = (1 - abs(og[0] - center) / factor) * (1 - abs(og[1] - center) / factor)
    weight = np.zeros((in_channels, out_channels, kernel_size, kernel_size), dtype='float32')
    weight[range(in_channels), range(out_channels), :, :] = filt
    return torch.from_numpy(weight)



pretrained_net=models.resnet34(pretrained=True)  #  调用resnet34卷积网络,进行预训练



class fcn(nn.Module):  # 定义卷积类
    def __init__(self, num_classes):
        super(fcn, self).__init__()

        self.stage1 = nn.Sequential(*list(pretrained_net.children())[:-4])  # 第一段 * 会保存所有未命名的变量名,
        #  .children()返回的是下一级模块的迭代器,可能包括了一个卷积层,一个激活层,一个池化层

        self.stage2 = list(pretrained_net.children())[-4]  # 第二段
        self.stage3 = list(pretrained_net.children())[-3]  # 第三段

        self.scores1 = nn.Conv2d(512, num_classes, 1)
        self.scores2 = nn.Conv2d(256, num_classes, 1)
        self.scores3 = nn.Conv2d(128, num_classes, 1)

        self.upsample_8x = nn.ConvTranspose2d(num_classes, num_classes, 16, 8, 4, bias=False)
        self.upsample_8x.weight.data = bilinear_kernel(num_classes, num_classes, 16)  # 使用双线性 kernel

        self.upsample_4x = nn.ConvTranspose2d(num_classes, num_classes, 4, 2, 1, bias=False)
        self.upsample_4x.weight.data = bilinear_kernel(num_classes, num_classes, 4)  # 使用双线性 kernel

        self.upsample_2x = nn.ConvTranspose2d(num_classes, num_classes, 4, 2, 1, bias=False)
        self.upsample_2x.weight.data = bilinear_kernel(num_classes, num_classes, 4)  # 使用双线性 kernel

    def forward(self, x):  # 前向传播
        x = self.stage1(x)
        s1 = x  # 1/8

        x = self.stage2(x)
        s2 = x  # 1/16

        x = self.stage3(x)
        s3 = x  # 1/32

        s3 = self.scores1(s3)
        s3 = self.upsample_2x(s3)
        s2 = self.scores2(s2)
        s2 = s2 + s3

        s1 = self.scores3(s1)
        s2 = self.upsample_4x(s2)
        s = s1 + s2

        s = self.upsample_8x(s2)
        return s


# 定义一些语义分割常用的指标,overal accuracy,mean IU
def _fast_hist(label_true, label_pred, n_class):
    mask = (label_true >= 0) & (label_true < n_class)
    hist = np.bincount(
        n_class * label_true[mask].astype(int) +
        label_pred[mask], minlength=n_class ** 2).reshape(n_class, n_class)
    return hist


def label_accuracy_score(label_trues, label_preds, n_class):
    """Returns accuracy score evaluation result.
      - overall accuracy
      - mean accuracy
      - mean IU
      - fwavacc
    """
    hist = np.zeros((n_class, n_class))
    for lt, lp in zip(label_trues, label_preds):
        hist += _fast_hist(lt.flatten(), lp.flatten(), n_class)
    acc = np.diag(hist).sum() / hist.sum()
    acc_cls = np.diag(hist) / hist.sum(axis=1)
    acc_cls = np.nanmean(acc_cls)
    iu = np.diag(hist) / (hist.sum(axis=1) + hist.sum(axis=0) - np.diag(hist))
    mean_iu = np.nanmean(iu)
    freq = hist.sum(axis=1) / hist.sum()
    fwavacc = (freq[freq > 0] * iu[freq > 0]).sum()
    return acc, acc_cls, mean_iu, fwavacc


num_classes=len(classes) # num_classes = 21
net=fcn(num_classes)

criterion = nn.NLLLoss()  # 定义损失函数
optimizer = torch.optim.SGD(net.parameters(), lr=1e-2, weight_decay=1e-4)


for e in range(20):
    print(e)
    train_loss = 0
    train_acc = 0

    train_acc_cls = 0
    train_mean_iu = 0
    train_fwavacc = 0

    prev_time = time.time()  # 返回的是1970年1月1日以来的秒数,这是一个浮点数

    net = net.train()  # 进入模型的训练模式
    net = net.cuda()   # 用GPU进行加速的

    for data in train_data:
        im = Variable(data[0].cuda())
        label = Variable(data[1].cuda())

        out = net(im)
        # print(out.shape)
        out = F.log_softmax(out, dim=1)  # (b, n, h, w)

        loss = criterion(out, label)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_loss += loss.data
        label_pred = out.max(dim=1)[1].data.cpu().numpy()
        label_true = label.data.cpu().numpy()

        for lbt, lbp in zip(label_true, label_pred):
            acc, acc_cls, mean_iu, fwavacc = label_accuracy_score(lbt, lbp, num_classes)
            train_acc += acc
            train_acc_cls += acc_cls
            train_mean_iu += mean_iu
            train_fwavacc += fwavacc




    # 由训练模式转变为测试模式
    net = net.eval()
    eval_loss = 0
    eval_acc = 0

    eval_acc_cls = 0
    eval_mean_iu = 0
    eval_fwavacc = 0

    for data in valid_data:
        with torch.no_grad():
            im = Variable(data[0].cuda())
            label = Variable(data[1].cuda())
            # forward
        out = net(im)
        out = F.log_softmax(out, dim=1)
        loss = criterion(out, label)

        eval_loss += loss.data

        label_pred = out.max(dim=1)[1].data.cpu().numpy()
        label_true = label.data.cpu().numpy()
        for lbt, lbp in zip(label_true, label_pred):
            acc, acc_cls, mean_iu, fwavacc = label_accuracy_score(lbt, lbp, num_classes)
            eval_acc += acc
            eval_acc_cls += acc_cls
            eval_mean_iu += mean_iu
            eval_fwavacc += fwavacc

    cur_time = time.time()

    epoch_str = ('Epoch: {}, Train Loss: {:.5f}, Train Acc: {:.5f}, Train Mean IU: {:.5f}, \
Valid Loss: {:.5f}, Valid Acc: {:.5f}, Valid Mean IU: {:.5f} '.format(
        e, train_loss / len(train_data), train_acc / len(voc_train), train_mean_iu / len(voc_train),
           eval_loss / len(valid_data), eval_acc / len(voc_test), eval_mean_iu / len(voc_test)))

    print(epoch_str)

data,label=read_images() # 函数的参数均已知,接收返回值


cm = np.array(colormap).astype('uint8')

def predict(im, label): # 预测结果
    im = Variable(im.unsqueeze(0)).cuda()
    out = net(im)
    pred = out.max(1)[1].squeeze().cpu().data.numpy()
    pred = cm[pred]
    return pred, cm[label.numpy()]



_, figs = plt.subplots(12, 3, figsize=(12, 10))
for i in range(12):
    print(i)
    test_data, test_label = voc_test[i]

    # print(test_data.shape)  # torch.Size([3, 320, 480])
    # print(test_label.shape)  # torch.Size([320, 480])

    pred, label = predict(test_data, test_label)
    figs[i, 0].imshow(Image.open(voc_test.data_list[i]))
    figs[i, 0].axes.get_xaxis().set_visible(False)
    figs[i, 0].axes.get_yaxis().set_visible(False)

    figs[i, 1].imshow(label)
    figs[i, 1].axes.get_xaxis().set_visible(False)
    figs[i, 1].axes.get_yaxis().set_visible(False)

    figs[i, 2].imshow(pred)
    figs[i, 2].axes.get_xaxis().set_visible(False)
    figs[i, 2].axes.get_yaxis().set_visible(False)
plt.show()
print("over")


10次训练的结果是:
在这里插入图片描述

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值