10 篇文章 1 订阅
3 篇文章 0 订阅

# 基础概念

## 语义分割算法基本流程

1. 输入:图像(RGB)
2. 算法:深度学习模型
3. 输出:分类结果(与输入大小一致的单通道图)
4. 训练过程:
1. 输入:        image + label
2. 前向:        out = model(image)
3. 计算损失: loss = loss_func(out,label)
4. 反向:        loss.backward()
5. 更新权重:optimizer.minimize(loss)

# 具体实现流程

## 图(数据预处理相关)

import random
import cv2
import numpy as np
import os

class Transform(object):
def __init__(self, size=256):
self.size=size

def __call__(self, input, label):
input = cv2.resize(input, (self.size, self.size), interpolation=cv2.INTER_LINEAR)
label = cv2.resize(input, (self.size, self.size), interpolation=cv2.INTER_LINEAR)

return input, label

def __init__(self,
image_folder,
image_list_file,
transform=None,
shuffle=True):
#需要输出的参数
self.image_folder = image_folder
self.image_list_file = image_list_file
self.transform = transform
self.shuffle = shuffle

#获取到文件列表
data_list = []
with open(self.image_list_file) as infile:
for line in infile:
data_path = os.path.join(self.image_folder,line.split()[0])
label_path = os.path.join(self.image_folder, line.split()[1])
data_list.append((data_path, label_path))
random.shuffle(data_list)
return data_list

def preprocess(self, data, label):
#定义预处理流程
h, w, c = data.shape
h_gt, w_gt = label.shape
assert h == h_gt, "Error"
assert w == w_gt, "Error"
if self.transform:
data, label = self.transform(data, label)
label = label[:, :, np.newaxis]
return data, label

def __len__(self):
return len(self.data_list)

def __call__(self):
#调用时,用迭代器返回数据和对应标签
for data_path, label_path in self.data_list:
data = cv2.cvtColor(data, cv2.COLOR_BGR2RGB)
data, label = self.preprocess(data, label)

yield data, label

def main():
batch_size = 5
place = fluid.CPUPlace()
with fluid.dygraph.guard(place):
transform = Transform(256)
image_folder = './dummy_data',
image_list_file = './dummy_data/list.txt',
transform = transform,
shuffle = True
)

# set sample generator for fluid dataloader (再配置关联上,我们定义的数据集加载器)
batch_size=batch_size,
places=place)

num_epoch = 2
for epoch in range(1, num_epoch+1):
print(f'Epoch [{epoch}/{num_epoch}]:')
for idx, (data, label) in enumerate(dataloader):
print(f'iter {idx}, Data shape: {data.shape}, Label shape:{label.shape}')

if __name__ == '__main__':
main()

import cv2
import numpy as np
import random

class Compose(object):
def __init__(self, transforms):
self.transforms = transforms
def __call__(self, image, label=None):
for t in self.transforms:
image, label = t(image, label)
return image, label

class Normalize(object):
def __init__(self, mean_val, std_val, val_scale=1):
# set val_scale = 1 if mean and std are in range (0,1)
# set val_scale to other value, if mean and std are in range (0,255)
self.mean = np.array(mean_val, dtype=np.float32)
self.std = np.array(std_val, dtype=np.float32)
self.val_scale = 1/255.0 if val_scale==1 else 1
def __call__(self, image, label=None):
image = image.astype(np.float32)
image = image * self.val_scale
image = image - self.mean
image = image * (1 / self.std)
return image, label

class ConvertDataType(object):
def __call__(self, image, label=None):
if label is not None:
label = label.astype(np.int64)
return image.astype(np.float32), label

# 增加边框，size指定为一个int类型，确定增加后图像的尺寸，方形;
# 若指定为一个tuple或list则宽高分别为list的值
def __init__(self, size, ignore_label=255, mean_val=0, val_scale=1):
# set val_scale to 1 if mean_val is in range (0, 1)
# set val_scale to 255 if mean_val is in range (0, 255)
factor = 255 if val_scale == 1 else 1

if isinstance(size, int):
self.size_height, self.size_width = size, size
else:
self.size_height, self.size_width = size[0], size[1]
self.ignore_label = ignore_label
self.mean_val=mean_val
# from 0-1 to 0-255
if isinstance(self.mean_val, (tuple,list)):
self.mean_val = [int(x* factor) for x in self.mean_val]
else:
self.mean_val = int(self.mean_val * factor)

def __call__(self, image, label=None):
h, w, c = image.shape
pad_h = max(self.size_height - h, 0)
pad_w = max(self.size_width - w, 0)

image = cv2.copyMakeBorder(image,
borderType=cv2.BORDER_CONSTANT,
value=self.mean_val)
if label is not None:
label = cv2.copyMakeBorder(label,
borderType=cv2.BORDER_CONSTANT,
value=self.ignore_label)
return image, label

# 输入为一个int类型的整数，或者元组，列表
class CenterCrop(object):
def __init__(self, output_size):
if isinstance(output_size, int):
self.output_size = (output_size, output_size)
else:
self.output_size = output_size

def _get_params(self, img):
th, tw = self.output_size
h, w, _ = img.shape
assert th <= h and tw <= w, "output size is bigger than image size"
x = int(round((w - tw) / 2.0))
y = int(round((h - th) / 2.0))
return x, y

def __call__(self, img, label=None):
x, y = self._get_params(img)
th, tw = self.output_size
if label is not None:
return img[y:y + th, x:x + tw], label[y:y + th, x:x + tw]
else:
return img[y:y + th, x:x + tw], label

# 缩放图像，输入尺寸可以是一个int类型，或一个tuple或list
class Resize(object):
def __init__(self, size, interpolation=1):
if isinstance(size, int):
self.size = (size, size)
else:
self.size = size
self.interpolation = interpolation

def __call__(self, img, label=None):
if label is not None:
return cv2.resize(img, self.size, self.interpolation), cv2.resize(label, self.size, self.interpolation)
else:
return cv2.resize(img, self.size, self.interpolation), label

# 随机翻转，code=0 垂直翻转，code=1 水平翻转，code=-1 水平垂直翻转
class RandomFlip(object):
def __init__(self, code=0):
self.prob = 0.5
self.code = code

def __call__(self, img, label=None):
if np.random.random() < self.prob:
if label is not None:
return cv2.flip(img, self.code), cv2.flip(label, self.code)
else:
return cv2.flip(img, self.code), label
return img, label

# 随机裁剪，输入尺寸，在图片上随机区域裁剪出指定大小图片
# 输入类型为int，tuple，list
class RandomCrop(object):
def __init__(self, img_size):
if isinstance(img_size, int):
self.img_width, self.img_height = img_size, img_size
else:
self.img_width, self.img_height = img_size[0], img_size[1]

def __call__(self, img, label=None):
return self.Random_crop(img, label)

def Random_crop(self, img, label):
height, width, _ = img.shape
width_range = width - self.img_width
height_range = height - self.img_height
random_ws = np.random.randint(width_range)
random_hs = np.random.randint(height_range)
random_wd = self.img_width + random_ws
random_hd = self.img_height + random_hs
img = img[random_hs:random_hd, random_ws:random_wd]
if label is not None:
label = label[random_hs:random_hd, random_ws:random_wd]
return img, label

# 缩放，输入为一个float类型
class Scale(object):
def __init__(self, ratio, interpolation=1):
self.ratio = ratio
self.interpolation = interpolation

def __call__(self, img, label=None):
width, height, _ = img.shape

if label is not None:
return cv2.resize(img, (int(height * self.ratio), int(width * self.ratio)), self.interpolation), \
cv2.resize(label, (int(height * self.ratio), int(width * self.ratio)), self.interpolation)
else:
return cv2.resize(img, (int(height * self.ratio), int(width * self.ratio)), self.interpolation), label

# 随即缩放，输入为一个float类型，或tuple，list
class RandomScale(object):
def __init__(self, range_data, interpolation=1):
if isinstance(range_data, (int, float)):
self.ratio = range_data
else:
self.ratio = random.uniform(range_data[0], range_data[1])
self.interpolation = interpolation

def __call__(self, img, label=None):
width, height, _ = img.shape
if label is not None:
return cv2.resize(img, (int(height * self.ratio), int(width * self.ratio)), self.interpolation), \
cv2.resize(label, (int(height * self.ratio), int(width * self.ratio)), self.interpolation)
else:
return cv2.resize(img, (int(height * self.ratio), int(width * self.ratio)), self.interpolation), label

def main():

# crop_size
img_1 = RandomCrop((300, 200))(image)[0]
cv2.imwrite('RandomCrop.png', img_1)

# Transform: RandomScale, RandomFlip, Pad, RandomCrop
img_2 = RandomScale((0.5, 3))(image)[0]
img_2 = RandomFlip(0)(img_2)[0]
img_2 = RandomCrop((400, 300))(img_2)[0]
cv2.imwrite('Transfoimgrm.png', img_2)

for i in range(10):
# call transform
img = RandomScale((0.5, 3))(image)[0]
img = RandomFlip(0)(img)[0]
img = RandomCrop((400, 300))(img)[0]
#  save image
cv2.imwrite('Transform_{}.png'.format(i+1), img)
print('Transform_{}.png'.format(i+1) + ' has been saved to disk')

if __name__ == "__main__":
main()

## 网(深度学习网络搭建)

import paddle
from paddle.fluid.dygraph import Conv2D,Pool2D  #TODO 导入需要的层
import numpy as np
np.set_printoptions(precision=2)  #打印精度

class BasicModel(fluid.dygraph.Layer):
# BasicModel contains:
# 1. pool:   4x4 max pool op, with stride 4
# 2. conv:   3x3 kernel size, takes RGB image as input and output num_classes channels,
#            note that the feature map size should be the same
# 3. upsample: upsample to input size
#
# TODOs:
# 1. The model takes an random input tensor with shape (1, 3, 8, 8)
# 2. The model outputs a tensor with same HxW size of the input, but C = num_classes
# 3. Print out the model output in numpy format

#类比pytorch搭建流程 继承fluid.dygraph.Layer
#先定义层,在forward的里面再将层串起来

def __init__(self, num_classes=59):
super(BasicModel, self).__init__()

self.pool1 = Pool2D(pool_size = 4,pool_stride = 4)# TODO

def forward(self, inputs):
x = self.pool1(inputs)# TODO
x = self.conv2(x) # TODO
x = fluid.layers.interpolate(x, out_shape=(inputs.shape[2], inputs.shape[3]))
return x

def main():
with fluid.dygraph.guard(place):
model = BasicModel(num_classes=59)
model.eval()
input_data = np.random.uniform(-1, 1, [1, 3,8, 8]).astype('float32')# TODO
print('Input data shape: ', input_data.shape)
input_data = base.to_variable(input_data)  # TODO
output_data = model(input_data)            # TODO
output_data = output_data.numpy()          # TODO
print('Output data shape: ', output_data.shape)

if __name__ == "__main__":
main()

## 训(网络训练测试)

### 计算loss

def Basic_SegLoss(preds, labels, ignore_index=255):
n, c, h, w = preds.shape

preds = fluid.layers.transpose(preds, [0, 2, 3, 1])

loss = fluid.layers.softmax_with_cross_entropy(preds, labels)
avg_loss = fluid.layers.mean(loss) / (fluid.layers.mean(mask) + eps)

return avg_loss

### 定义单次训练流程

def train(dataloader, model, criterion, optimizer, epoch, total_batch):
model.train()
train_loss_meter = AverageMeter()
image = data[0]
label = data[1]

image = fluid.layers.transpose(image, (0, 3, 1, 2))
pred = model(image)
loss = criterion(pred, label)

loss.backward()
optimizer.minimize(loss)

n = image.shape[0]
train_loss_meter.update(loss.numpy()[0], n)
print(f"Epoch[{epoch:03d}/{args.num_epochs:03d}], " +
f"Step[{batch_id:04d}/{total_batch:04d}], " +
f"Average Loss: {train_loss_meter.avg:4f}")

return train_loss_meter.avg

### 串成完整的训练脚本

import os
import numpy as np
import argparse
from utils import AverageMeter
from basic_model import BasicModel
from basic_seg_loss import Basic_SegLoss
from basic_data_preprocessing import TrainAugmentation

parser = argparse.ArgumentParser()

args = parser.parse_args()

def main():
# Step 0: preparation
with fluid.dygraph.guard(place):
# Step 1: Define training dataloader
basic_augmentation = TrainAugmentation(image_size=256)
image_list_file=args.image_list_file,
transform=basic_augmentation,
shuffle=True)
use_multiprocess=True)
batch_size=args.batch_size,
places=place)

# Step 2: Create model
if args.net == "basic":
model = BasicModel()
else:
raise NotImplementedError(f"args.net: {args.net} is not Supported!")

# Step 3: Define criterion and optimizer
criterion = Basic_SegLoss

# create optimizer
parameter_list=model.parameters())
# Step 4: Training
for epoch in range(1, args.num_epochs+1):
model,
criterion,
optimizer,
epoch,
total_batch)
print(f"----- Epoch[{epoch}/{args.num_epochs}] Train Loss: {train_loss:.4f}")

if epoch % args.save_freq == 0 or epoch == args.num_epochs:
model_path = os.path.join(args.checkpoint_folder, f"{args.net}-Epoch-{epoch}-Loss-{train_loss}")

# save model and optmizer states
model_dict = model.state_dict()
fluid.save_dygraph(model_dict, model_path)
optimizer_dict = optimizer.state_dict()
fluid.save_dygraph(optimizer_dict, model_path)
print(f'----- Save model: {model_path}.pdparams')
print(f'----- Save optimizer: {model_path}.pdopt')

if __name__ == "__main__":
main()

04-23 2240
07-22 321
09-11 2123
01-18 2014

### “相关推荐”对你有帮助么？

• 非常没帮助
• 没帮助
• 一般
• 有帮助
• 非常有帮助

¥2 ¥4 ¥6 ¥10 ¥20

1.余额是钱包充值的虚拟货币，按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载，可以购买VIP、C币套餐、付费专栏及课程。