import os
import random
import numpy as np
import torch
from torch.utils.data import Dataset
import cv2
from torch.utils.data import DataLoader
import torch.nn as nn
from torch.autograd import Variable # 模型输入的类型为 Variable
import torch
import matplotlib.pyplot as plt
from torchvision import transforms as tfs # 先引入对数据预处理的库
from PIL import Image # Python的图像库PIL可以处理图像,其中,他的一个类Image提供了open()、save()、convert()、show()来处理图像
from torchvision import models
import time
import torch.nn.functional as F
voc_root="../data/VOCdevkit/VOC2012/" # 数据存放的位置
classes = ['background','aeroplane','bicycle','bird','boat',
'bottle','bus','car','cat','chair','cow','diningtable',
'dog','horse','motorbike','person','potted plant',
'sheep','sofa','train','tv/monitor']
colormap = [[0,0,0],[128,0,0],[0,128,0], [128,128,0], [0,0,128],
[128,0,128],[0,128,128],[128,128,128],[64,0,0],[192,0,0],
[64,128,0],[192,128,0],[64,0,128],[192,0,128],
[64,128,128],[192,128,128],[0,64,0],[128,64,0],
[0,192,0],[128,192,0],[0,64,128]]
cm2lbl=np.zeros(256**3) # 创建一个初值为 0 的,维度是255 * 255 * 255的array([])
for i,cm in enumerate(colormap): # 遍历21个类别的RGB图,将它们的RGB值用一个数值代替,保存的是索引值,而cm保存的是索引i下面的值
cm2lbl[(cm[0]*256+cm[1])*256+cm[2]]=i # 参考了三个维度,将每一个图片都赋值给一个值,下面最直接通过一个值进行索引就可以了
def image2label(im):
data = np.array(im, dtype='int32') # label就是由0、1组成的array
# print(data.shape) # (366, 500, 3)
idx = (data[:, :, 0] * 256 + data[:, :, 1]) * 256 + data[:, :, 2]
# print(idx.shape) # (366, 500)
return np.array(cm2lbl[idx], dtype='int64')
'''
label_im = Image.open('../data/VOCdevkit/VOC2012/SegmentationClass/2007_000033.png').convert('RGB') # 可能原来的像素是BGR的
label_im.show() # 显示一张PNG形式的图片
label = image2label(label_im) # 通过 image2labe()函数将 2007_000033.png 进行转化
# print(label.shape) # 原来图片是366 * 500 * 3的,现在是366*500
# print(label[150:160, 240:250])
'''
def read_images(root=voc_root,train=True): # 读取训练集 和 验证集
txt_filename=root+"/ImageSets/Segmentation/"+('train.txt'if train else 'val.txt') # train=True的时候返回‘train.txt' 否则返回'val.txt'
with open(txt_filename,'r') as f: # 利用 with open( ) as f的方式打开文件夹,当数据读完的时候,不需要手动关闭了
images=f.read().split() # split(str='' ,num= ),以str为分割,并且分割成num+1段,默认的分割符为空格
data=[os.path.join(root,'JPEGImages',i+'.jpg')for i in images] # os提供了很多操作系统的功能接口,实现了对文件和目录的操作
# os.path.join()是用来拼接完整路径的,循环将图片读进data里
label=[os.path.join(root,'SegmentationClass',i+'.png')for i in images] # 循环将图片对应的标签读进label里
return data,label # 返回读取的图片和标签
def rand_crop(data,label,height,width): # 自定义随机切割函数,自定义要切割的高度和宽度
h,w,_=data.shape # 自定义这个函数,在处理图片的过程中,一定是一个图片一个图片的处理的,每一张图片大小不一,先计算每张图片的大小
top=random.randint(0,h-height) # 产生一个随机的整数
left=random.randint(0,w-width) # 产生一个随机的整数
data=data[top:top+height,left:left+width] # 进行切割
label=label[top:top+height,left:left+width] #进行切割
return data,label # 返回切好之后的图片
def img_transforms(im, label, crop_size): # 该函数实现了对数据的预处理
im, label = rand_crop(im, label, *crop_size) # 该函数实现了对原始图片及其标签的随机切割
im_tfs = tfs.Compose([
tfs.ToTensor(), # 将图片转化为pytorch中处理的对象Tensor,在转化的过程中,pytorch自动的将图片标准化,即Tensor的0-1
tfs.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) # 标准化,第一个参数是均值,第二个参数是方差
])
im = im_tfs(im)
label = image2label(label)
label = torch.from_numpy(label) # 将label从numpy.array转化为Tensor
return im, label # 返回处理后的图片以及标签
class VOCSegDataset(Dataset): # 继承torch.utils.data中的Dataset
def __init__(self, train, crop_size, transforms): # 类的初始化
self.crop_size = crop_size
self.transforms = transforms
data_list, label_list = read_images(train=train) # 自定义函数read_images()读取训练集的数据
self.data_list = self._filter(data_list)
self.label_list = self._filter(label_list)
print('训练集和测试集: '+ str(len(self.data_list)) )
def _filter(self, images): # 过滤掉图片大小小于 crop 大小的图片
return [im for im in images if (Image.open(im).size[1] >= self.crop_size[0] and
Image.open(im).size[0] >= self.crop_size[1])]
def __getitem__(self, idx): # 定义获取容器中指定元素的行为,相当于self[key],允许类对象可以有索引操作
img = self.data_list[idx]
label = self.label_list[idx]
img = cv2.imread(img) # 读取出图片
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # cv2.cvtColor()实现空间颜色的转换,由BGR->RGB
label = cv2.imread(label) # 读取图片的标签
label = cv2.cvtColor(label, cv2.COLOR_BGR2RGB) # cv2.cvtColor()实现空间颜色的转换,由BGR->RGB
img, label = self.transforms(img, label, self.crop_size)
return img, label
def __len__(self): # 定义获取容器中元素的个数
return len(self.data_list)
################################################################
input_shape=(320,480)
voc_train=VOCSegDataset(True,input_shape,img_transforms) # 包含了训练集的img和lable
voc_test=VOCSegDataset(False,input_shape,img_transforms) # 包含了测试集的img和lable
voc_test.data_list
train_data=DataLoader(voc_train,batch_size=6,shuffle=True) # 将训练集包装成Tensor
valid_data=DataLoader(voc_test,batch_size=6) # 将验证集包装成Tensor
#######################################################################
def bilinear_kernel(in_channels, out_channels, kernel_size): # 双线性插值法,实现对图片的上采样,参数分别是输入通道,输出通道,卷积核的大小
factor = (kernel_size + 1) // 2
if kernel_size % 2 == 1:
center = factor - 1
else:
center = factor - 0.5
og = np.ogrid[:kernel_size, :kernel_size]
filt = (1 - abs(og[0] - center) / factor) * (1 - abs(og[1] - center) / factor)
weight = np.zeros((in_channels, out_channels, kernel_size, kernel_size), dtype='float32')
weight[range(in_channels), range(out_channels), :, :] = filt
return torch.from_numpy(weight)
pretrained_net=models.resnet34(pretrained=True) # 调用resnet34卷积网络,进行预训练
class fcn(nn.Module): # 定义卷积类
def __init__(self, num_classes):
super(fcn, self).__init__()
self.stage1 = nn.Sequential(*list(pretrained_net.children())[:-4]) # 第一段 * 会保存所有未命名的变量名,
# .children()返回的是下一级模块的迭代器,可能包括了一个卷积层,一个激活层,一个池化层
self.stage2 = list(pretrained_net.children())[-4] # 第二段
self.stage3 = list(pretrained_net.children())[-3] # 第三段
self.scores1 = nn.Conv2d(512, num_classes, 1)
self.scores2 = nn.Conv2d(256, num_classes, 1)
self.scores3 = nn.Conv2d(128, num_classes, 1)
self.upsample_8x = nn.ConvTranspose2d(num_classes, num_classes, 16, 8, 4, bias=False)
self.upsample_8x.weight.data = bilinear_kernel(num_classes, num_classes, 16) # 使用双线性 kernel
self.upsample_4x = nn.ConvTranspose2d(num_classes, num_classes, 4, 2, 1, bias=False)
self.upsample_4x.weight.data = bilinear_kernel(num_classes, num_classes, 4) # 使用双线性 kernel
self.upsample_2x = nn.ConvTranspose2d(num_classes, num_classes, 4, 2, 1, bias=False)
self.upsample_2x.weight.data = bilinear_kernel(num_classes, num_classes, 4) # 使用双线性 kernel
def forward(self, x): # 前向传播
x = self.stage1(x)
s1 = x # 1/8
x = self.stage2(x)
s2 = x # 1/16
x = self.stage3(x)
s3 = x # 1/32
s3 = self.scores1(s3)
s3 = self.upsample_2x(s3)
s2 = self.scores2(s2)
s2 = s2 + s3
s1 = self.scores3(s1)
s2 = self.upsample_4x(s2)
s = s1 + s2
s = self.upsample_8x(s2)
return s
# 定义一些语义分割常用的指标,overal accuracy,mean IU
def _fast_hist(label_true, label_pred, n_class):
mask = (label_true >= 0) & (label_true < n_class)
hist = np.bincount(
n_class * label_true[mask].astype(int) +
label_pred[mask], minlength=n_class ** 2).reshape(n_class, n_class)
return hist
def label_accuracy_score(label_trues, label_preds, n_class):
"""Returns accuracy score evaluation result.
- overall accuracy
- mean accuracy
- mean IU
- fwavacc
"""
hist = np.zeros((n_class, n_class))
for lt, lp in zip(label_trues, label_preds):
hist += _fast_hist(lt.flatten(), lp.flatten(), n_class)
acc = np.diag(hist).sum() / hist.sum()
acc_cls = np.diag(hist) / hist.sum(axis=1)
acc_cls = np.nanmean(acc_cls)
iu = np.diag(hist) / (hist.sum(axis=1) + hist.sum(axis=0) - np.diag(hist))
mean_iu = np.nanmean(iu)
freq = hist.sum(axis=1) / hist.sum()
fwavacc = (freq[freq > 0] * iu[freq > 0]).sum()
return acc, acc_cls, mean_iu, fwavacc
num_classes=len(classes) # num_classes = 21
net=fcn(num_classes)
criterion = nn.NLLLoss() # 定义损失函数
optimizer = torch.optim.SGD(net.parameters(), lr=1e-2, weight_decay=1e-4)
for e in range(20):
print(e)
train_loss = 0
train_acc = 0
train_acc_cls = 0
train_mean_iu = 0
train_fwavacc = 0
prev_time = time.time() # 返回的是1970年1月1日以来的秒数,这是一个浮点数
net = net.train() # 进入模型的训练模式
net = net.cuda() # 用GPU进行加速的
for data in train_data:
im = Variable(data[0].cuda())
label = Variable(data[1].cuda())
out = net(im)
# print(out.shape)
out = F.log_softmax(out, dim=1) # (b, n, h, w)
loss = criterion(out, label)
optimizer.zero_grad()
loss.backward()
optimizer.step()
train_loss += loss.data
label_pred = out.max(dim=1)[1].data.cpu().numpy()
label_true = label.data.cpu().numpy()
for lbt, lbp in zip(label_true, label_pred):
acc, acc_cls, mean_iu, fwavacc = label_accuracy_score(lbt, lbp, num_classes)
train_acc += acc
train_acc_cls += acc_cls
train_mean_iu += mean_iu
train_fwavacc += fwavacc
# 由训练模式转变为测试模式
net = net.eval()
eval_loss = 0
eval_acc = 0
eval_acc_cls = 0
eval_mean_iu = 0
eval_fwavacc = 0
for data in valid_data:
with torch.no_grad():
im = Variable(data[0].cuda())
label = Variable(data[1].cuda())
# forward
out = net(im)
out = F.log_softmax(out, dim=1)
loss = criterion(out, label)
eval_loss += loss.data
label_pred = out.max(dim=1)[1].data.cpu().numpy()
label_true = label.data.cpu().numpy()
for lbt, lbp in zip(label_true, label_pred):
acc, acc_cls, mean_iu, fwavacc = label_accuracy_score(lbt, lbp, num_classes)
eval_acc += acc
eval_acc_cls += acc_cls
eval_mean_iu += mean_iu
eval_fwavacc += fwavacc
cur_time = time.time()
epoch_str = ('Epoch: {}, Train Loss: {:.5f}, Train Acc: {:.5f}, Train Mean IU: {:.5f}, \
Valid Loss: {:.5f}, Valid Acc: {:.5f}, Valid Mean IU: {:.5f} '.format(
e, train_loss / len(train_data), train_acc / len(voc_train), train_mean_iu / len(voc_train),
eval_loss / len(valid_data), eval_acc / len(voc_test), eval_mean_iu / len(voc_test)))
print(epoch_str)
data,label=read_images() # 函数的参数均已知,接收返回值
cm = np.array(colormap).astype('uint8')
def predict(im, label): # 预测结果
im = Variable(im.unsqueeze(0)).cuda()
out = net(im)
pred = out.max(1)[1].squeeze().cpu().data.numpy()
pred = cm[pred]
return pred, cm[label.numpy()]
_, figs = plt.subplots(12, 3, figsize=(12, 10))
for i in range(12):
print(i)
test_data, test_label = voc_test[i]
# print(test_data.shape) # torch.Size([3, 320, 480])
# print(test_label.shape) # torch.Size([320, 480])
pred, label = predict(test_data, test_label)
figs[i, 0].imshow(Image.open(voc_test.data_list[i]))
figs[i, 0].axes.get_xaxis().set_visible(False)
figs[i, 0].axes.get_yaxis().set_visible(False)
figs[i, 1].imshow(label)
figs[i, 1].axes.get_xaxis().set_visible(False)
figs[i, 1].axes.get_yaxis().set_visible(False)
figs[i, 2].imshow(pred)
figs[i, 2].axes.get_xaxis().set_visible(False)
figs[i, 2].axes.get_yaxis().set_visible(False)
plt.show()
print("over")
10次训练的结果是: