基础概念
语义分割分割算法的核心:像素级分类
语义分割算法基本流程
- 输入:图像(RGB)
- 算法:深度学习模型
- 输出:分类结果(与输入大小一致的单通道图)
- 训练过程:
- 输入: image + label
- 前向: out = model(image)
- 计算损失: loss = loss_func(out,label)
- 反向: loss.backward()
- 更新权重:optimizer.minimize(loss)
评价指标
mAcc
结果样式:
实际计算流程:
mIOU
其中mean表现在多个类别的IOU最后求标签.
两个评价指标都是越大越好.
具体实现流程
图(数据预处理相关)
import random
import cv2
import numpy as np
import paddle.fluid as fluid
import os
class Transform(object):
def __init__(self, size=256):
self.size=size
def __call__(self, input, label):
input = cv2.resize(input, (self.size, self.size), interpolation=cv2.INTER_LINEAR)
label = cv2.resize(input, (self.size, self.size), interpolation=cv2.INTER_LINEAR)
return input, label
class BasicDataLoader(object):
def __init__(self,
image_folder,
image_list_file,
transform=None,
shuffle=True):
#需要输出的参数
self.image_folder = image_folder
self.image_list_file = image_list_file
self.transform = transform
self.shuffle = shuffle
self.data_list = self.read_list()
def read_list(self):
#获取到文件列表
data_list = []
with open(self.image_list_file) as infile:
for line in infile:
data_path = os.path.join(self.image_folder,line.split()[0])
label_path = os.path.join(self.image_folder, line.split()[1])
data_list.append((data_path, label_path))
random.shuffle(data_list)
return data_list
def preprocess(self, data, label):
#定义预处理流程
h, w, c = data.shape
h_gt, w_gt = label.shape
assert h == h_gt, "Error"
assert w == w_gt, "Error"
if self.transform:
data, label = self.transform(data, label)
label = label[:, :, np.newaxis]
return data, label
def __len__(self):
return len(self.data_list)
def __call__(self):
#调用时,用迭代器返回数据和对应标签
for data_path, label_path in self.data_list:
data = cv2.imread(data_path, cv2.IMREAD_COLOR)
data = cv2.cvtColor(data, cv2.COLOR_BGR2RGB)
label = cv2.imread(label_path, cv2.IMREAD_GRAYSCALE)
data, label = self.preprocess(data, label)
yield data, label
def main():
batch_size = 5
place = fluid.CPUPlace()
with fluid.dygraph.guard(place):
transform = Transform(256)
# create BasicDataloader instance
basic_dataloader = BasicDataLoader(
image_folder = './dummy_data',
image_list_file = './dummy_data/list.txt',
transform = transform,
shuffle = True
)
# create fluid.io.Dataloader instance (配合paddel数据集加载器使用,先创建)
dataloader = fluid.io.DataLoader.from_generator(capacity=1, use_multiprocess=False)
# set sample generator for fluid dataloader (再配置关联上,我们定义的数据集加载器)
dataloader.set_sample_generator(basic_dataloader,
batch_size=batch_size,
places=place)
num_epoch = 2
for epoch in range(1, num_epoch+1):
print(f'Epoch [{epoch}/{num_epoch}]:')
for idx, (data, label) in enumerate(dataloader):
print(f'iter {idx}, Data shape: {data.shape}, Label shape:{label.shape}')
if __name__ == '__main__':
main()
其中数据变换时,我们会额外定义出一个新的类来,执行翻转,裁剪,补边等常见的数据预处理操作.
import cv2
import numpy as np
import random
class Compose(object):
def __init__(self, transforms):
self.transforms = transforms
def __call__(self, image, label=None):
for t in self.transforms:
image, label = t(image, label)
return image, label
class Normalize(object):
def __init__(self, mean_val, std_val, val_scale=1):
# set val_scale = 1 if mean and std are in range (0,1)
# set val_scale to other value, if mean and std are in range (0,255)
self.mean = np.array(mean_val, dtype=np.float32)
self.std = np.array(std_val, dtype=np.float32)
self.val_scale = 1/255.0 if val_scale==1 else 1
def __call__(self, image, label=None):
image = image.astype(np.float32)
image = image * self.val_scale
image = image - self.mean
image = image * (1 / self.std)
return image, label
class ConvertDataType(object):
def __call__(self, image, label=None):
if label is not None:
label = label.astype(np.int64)
return image.astype(np.float32), label
# 增加边框,size指定为一个int类型,确定增加后图像的尺寸,方形;
# 若指定为一个tuple或list则宽高分别为list的值
class Pad(object):
def __init__(self, size, ignore_label=255, mean_val=0, val_scale=1):
# set val_scale to 1 if mean_val is in range (0, 1)
# set val_scale to 255 if mean_val is in range (0, 255)
factor = 255 if val_scale == 1 else 1
if isinstance(size, int):
self.size_height, self.size_width = size, size
else:
self.size_height, self.size_width = size[0], size[1]
self.ignore_label = ignore_label
self.mean_val=mean_val
# from 0-1 to 0-255
if isinstance(self.mean_val, (tuple,list)):
self.mean_val = [int(x* factor) for x in self.mean_val]
else:
self.mean_val = int(self.mean_val * factor)
def __call__(self, image, label=None):
h, w, c = image.shape
pad_h = max(self.size_height - h, 0)
pad_w = max(self.size_width - w, 0)
pad_h_half = int(pad_h / 2)
pad_w_half = int(pad_w / 2)
if pad_h > 0 or pad_w > 0:
image = cv2.copyMakeBorder(image,
top=pad_h_half,
left=pad_w_half,
bottom=pad_h - pad_h_half,
right=pad_w - pad_w_half,
borderType=cv2.BORDER_CONSTANT,
value=self.mean_val)
if label is not None:
label = cv2.copyMakeBorder(label,
top=pad_h_half,
left=pad_w_half,
bottom=pad_h - pad_h_half,
right=pad_w - pad_w_half,
borderType=cv2.BORDER_CONSTANT,
value=self.ignore_label)
return image, label
# 输入为一个int类型的整数,或者元组,列表
class CenterCrop(object):
def __init__(self, output_size):
if isinstance(output_size, int):
self.output_size = (output_size, output_size)
else:
self.output_size = output_size
def _get_params(self, img):
th, tw = self.output_size
h, w, _ = img.shape
assert th <= h and tw <= w, "output size is bigger than image size"
x = int(round((w - tw) / 2.0))
y = int(round((h - th) / 2.0))
return x, y
def __call__(self, img, label=None):
x, y = self._get_params(img)
th, tw = self.output_size
if label is not None:
return img[y:y + th, x:x + tw], label[y:y + th, x:x + tw]
else:
return img[y:y + th, x:x + tw], label
# 缩放图像,输入尺寸可以是一个int类型,或一个tuple或list
class Resize(object):
def __init__(self, size, interpolation=1):
if isinstance(size, int):
self.size = (size, size)
else:
self.size = size
self.interpolation = interpolation
def __call__(self, img, label=None):
if label is not None:
return cv2.resize(img, self.size, self.interpolation), cv2.resize(label, self.size, self.interpolation)
else:
return cv2.resize(img, self.size, self.interpolation), label
# 随机翻转,code=0 垂直翻转,code=1 水平翻转,code=-1 水平垂直翻转
class RandomFlip(object):
def __init__(self, code=0):
self.prob = 0.5
self.code = code
def __call__(self, img, label=None):
if np.random.random() < self.prob:
if label is not None:
return cv2.flip(img, self.code), cv2.flip(label, self.code)
else:
return cv2.flip(img, self.code), label
return img, label
# 随机裁剪,输入尺寸,在图片上随机区域裁剪出指定大小图片
# 输入类型为int,tuple,list
class RandomCrop(object):
def __init__(self, img_size):
if isinstance(img_size, int):
self.img_width, self.img_height = img_size, img_size
else:
self.img_width, self.img_height = img_size[0], img_size[1]
def __call__(self, img, label=None):
return self.Random_crop(img, label)
def Random_crop(self, img, label):
height, width, _ = img.shape
width_range = width - self.img_width
height_range = height - self.img_height
random_ws = np.random.randint(width_range)
random_hs = np.random.randint(height_range)
random_wd = self.img_width + random_ws
random_hd = self.img_height + random_hs
img = img[random_hs:random_hd, random_ws:random_wd]
if label is not None:
label = label[random_hs:random_hd, random_ws:random_wd]
return img, label
# 缩放,输入为一个float类型
class Scale(object):
def __init__(self, ratio, interpolation=1):
self.ratio = ratio
self.interpolation = interpolation
def __call__(self, img, label=None):
width, height, _ = img.shape
if label is not None:
return cv2.resize(img, (int(height * self.ratio), int(width * self.ratio)), self.interpolation), \
cv2.resize(label, (int(height * self.ratio), int(width * self.ratio)), self.interpolation)
else:
return cv2.resize(img, (int(height * self.ratio), int(width * self.ratio)), self.interpolation), label
# 随即缩放,输入为一个float类型,或tuple,list
class RandomScale(object):
def __init__(self, range_data, interpolation=1):
if isinstance(range_data, (int, float)):
self.ratio = range_data
else:
self.ratio = random.uniform(range_data[0], range_data[1])
self.interpolation = interpolation
def __call__(self, img, label=None):
width, height, _ = img.shape
if label is not None:
return cv2.resize(img, (int(height * self.ratio), int(width * self.ratio)), self.interpolation), \
cv2.resize(label, (int(height * self.ratio), int(width * self.ratio)), self.interpolation)
else:
return cv2.resize(img, (int(height * self.ratio), int(width * self.ratio)), self.interpolation), label
def main():
image = cv2.imread('./work/dummy_data/JPEGImages/2008_000064.jpg')
label = cv2.imread('./work/dummy_data/GroundTruth_trainval_png/2008_000064.png')
# crop_size
img_1 = RandomCrop((300, 200))(image)[0]
cv2.imwrite('RandomCrop.png', img_1)
# Transform: RandomScale, RandomFlip, Pad, RandomCrop
img_2 = RandomScale((0.5, 3))(image)[0]
img_2 = RandomFlip(0)(img_2)[0]
img_2 = Pad(700)(img_2)[0]
img_2 = RandomCrop((400, 300))(img_2)[0]
cv2.imwrite('Transfoimgrm.png', img_2)
for i in range(10):
# call transform
img = RandomScale((0.5, 3))(image)[0]
img = RandomFlip(0)(img)[0]
img = Pad((700, 700))(img)[0]
img = RandomCrop((400, 300))(img)[0]
# save image
cv2.imwrite('Transform_{}.png'.format(i+1), img)
print('Transform_{}.png'.format(i+1) + ' has been saved to disk')
if __name__ == "__main__":
main()
网(深度学习网络搭建)
import paddle
import paddle.fluid as fluid
from paddle.fluid.dygraph import Conv2D,Pool2D #TODO 导入需要的层
from paddle.fluid.dygraph import base #TODO
import numpy as np
np.set_printoptions(precision=2) #打印精度
class BasicModel(fluid.dygraph.Layer):
# BasicModel contains:
# 1. pool: 4x4 max pool op, with stride 4
# 2. conv: 3x3 kernel size, takes RGB image as input and output num_classes channels,
# note that the feature map size should be the same
# 3. upsample: upsample to input size
#
# TODOs:
# 1. The model takes an random input tensor with shape (1, 3, 8, 8)
# 2. The model outputs a tensor with same HxW size of the input, but C = num_classes
# 3. Print out the model output in numpy format
#类比pytorch搭建流程 继承fluid.dygraph.Layer
#先定义层,在forward的里面再将层串起来
def __init__(self, num_classes=59):
super(BasicModel, self).__init__()
self.pool1 = Pool2D(pool_size = 4,pool_stride = 4)# TODO
self.conv2 = Conv2D(3,num_classes,3,padding=1)# TODO
def forward(self, inputs):
x = self.pool1(inputs)# TODO
x = self.conv2(x) # TODO
x = fluid.layers.interpolate(x, out_shape=(inputs.shape[2], inputs.shape[3]))
return x
def main():
place = paddle.fluid.CUDAPlace(0)
with fluid.dygraph.guard(place):
model = BasicModel(num_classes=59)
model.eval()
input_data = np.random.uniform(-1, 1, [1, 3,8, 8]).astype('float32')# TODO
print('Input data shape: ', input_data.shape)
input_data = base.to_variable(input_data) # TODO
output_data = model(input_data) # TODO
output_data = output_data.numpy() # TODO
print('Output data shape: ', output_data.shape)
if __name__ == "__main__":
main()
训(网络训练测试)
计算loss
def Basic_SegLoss(preds, labels, ignore_index=255):
n, c, h, w = preds.shape
preds = fluid.layers.transpose(preds, [0, 2, 3, 1])
mask = labels!=ignore_index
mask = fluid.layers.cast(mask, 'float32')
loss = fluid.layers.softmax_with_cross_entropy(preds, labels)
loss = loss * mask
avg_loss = fluid.layers.mean(loss) / (fluid.layers.mean(mask) + eps)
return avg_loss
定义单次训练流程
def train(dataloader, model, criterion, optimizer, epoch, total_batch):
model.train()
train_loss_meter = AverageMeter()
for batch_id, data in enumerate(dataloader):
image = data[0]
label = data[1]
image = fluid.layers.transpose(image, (0, 3, 1, 2))
pred = model(image)
loss = criterion(pred, label)
loss.backward()
optimizer.minimize(loss)
model.clear_gradients()
n = image.shape[0]
train_loss_meter.update(loss.numpy()[0], n)
print(f"Epoch[{epoch:03d}/{args.num_epochs:03d}], " +
f"Step[{batch_id:04d}/{total_batch:04d}], " +
f"Average Loss: {train_loss_meter.avg:4f}")
return train_loss_meter.avg
串成完整的训练脚本
import os
import paddle
import paddle.fluid as fluid
from paddle.fluid.optimizer import AdamOptimizer
import numpy as np
import argparse
from utils import AverageMeter
from basic_model import BasicModel
from basic_dataloader import BasicDataLoader
from basic_seg_loss import Basic_SegLoss
from basic_data_preprocessing import TrainAugmentation
parser = argparse.ArgumentParser()
parser.add_argument('--net', type=str, default='basic')
parser.add_argument('--lr', type=float, default=0.001)
parser.add_argument('--num_epochs', type=int, default=10)
parser.add_argument('--batch_size', type=int, default=4)
parser.add_argument('--image_folder', type=str, default='./work/dummy_data')
parser.add_argument('--image_list_file', type=str, default='./work/dummy_data/list.txt')
parser.add_argument('--checkpoint_folder', type=str, default='./output')
parser.add_argument('--save_freq', type=int, default=2)
args = parser.parse_args()
def main():
# Step 0: preparation
place = paddle.fluid.CUDAPlace(0)
with fluid.dygraph.guard(place):
# Step 1: Define training dataloader
basic_augmentation = TrainAugmentation(image_size=256)
basic_dataloader = BasicDataLoader(image_folder=args.image_folder,
image_list_file=args.image_list_file,
transform=basic_augmentation,
shuffle=True)
train_dataloader = fluid.io.DataLoader.from_generator(capacity=10,
use_multiprocess=True)
train_dataloader.set_sample_generator(basic_dataloader,
batch_size=args.batch_size,
places=place)
total_batch = int(len(basic_dataloader) / args.batch_size)
# Step 2: Create model
if args.net == "basic":
model = BasicModel()
else:
raise NotImplementedError(f"args.net: {args.net} is not Supported!")
# Step 3: Define criterion and optimizer
criterion = Basic_SegLoss
# create optimizer
optimizer = AdamOptimizer(learning_rate=args.lr,
parameter_list=model.parameters())
# Step 4: Training
for epoch in range(1, args.num_epochs+1):
train_loss = train(train_dataloader,
model,
criterion,
optimizer,
epoch,
total_batch)
print(f"----- Epoch[{epoch}/{args.num_epochs}] Train Loss: {train_loss:.4f}")
if epoch % args.save_freq == 0 or epoch == args.num_epochs:
model_path = os.path.join(args.checkpoint_folder, f"{args.net}-Epoch-{epoch}-Loss-{train_loss}")
# save model and optmizer states
model_dict = model.state_dict()
fluid.save_dygraph(model_dict, model_path)
optimizer_dict = optimizer.state_dict()
fluid.save_dygraph(optimizer_dict, model_path)
print(f'----- Save model: {model_path}.pdparams')
print(f'----- Save optimizer: {model_path}.pdopt')
if __name__ == "__main__":
main()