第一次比较完整地参加在线的训练营,也相对认真地实现了课程作业。通过这次训练营,比较直观地了解了图像分割网络的基本原理,并对几个经典的网络做了代码实现。
其中第一次作业的图像数据增强部分代码实现留下的印象比较深,涉及到一些图像处理和Numpy数组操作的技巧,自己纯手工实现,贴出来以供参考。
import cv2
import numpy as np
class Compose(object):
def __init__(self, transforms):
self.transforms = transforms
def __call__(self, image, label=None):
for t in self.transforms:
image, label = t(image, label)
return image, label
class Normalize(object):
def __init__(self, mean_val, std_val, val_scale=1):
# set val_scale = 1 if mean and std are in range (0,1)
# set val_scale to other value, if mean and std are in range (0,255)
self.mean = np.array(mean_val, dtype=np.float32)
self.std = np.array(std_val, dtype=np.float32)
self.val_scale = 1/255.0 if val_scale==1 else 1
def __call__(self, image, label=None):
image = image.astype(np.float32)
image = image * self.val_scale
image = image - self.mean
image = image * (1 / self.std)
return image, label
class ConvertDataType(object):
def __call__(self, image, label=None):
if label is not None:
label = label.astype(np.int64)
return image.astype(np.float32), label
class Pad(object):
def __init__(self, size, ignore_label=255, mean_val=0, val_scale=1):
# set val_scale to 1 if mean_val is in range (0, 1)
# set val_scale to 255 if mean_val is in range (0, 255)
factor = 255 if val_scale == 1 else 1
self.size = size
self.ignore_label = ignore_label
self.mean_val=mean_val
# from 0-1 to 0-255
if isinstance(self.mean_val, (tuple,list)):
self.mean_val = [int(x* factor) for x in self.mean_val]
else:
self.mean_val = int(self.mean_val * factor)
def __call__(self, image, label=None):
h, w, c = image.shape
pad_h = max(self.size - h, 0)
pad_w = max(self.size - w, 0)
pad_h_half = int(pad_h / 2)
pad_w_half = int(pad_w / 2)
if pad_h > 0 or pad_w > 0:
image = cv2.copyMakeBorder(image,
top=pad_h_half,
left=pad_w_half,
bottom=pad_h - pad_h_half,
right=pad_w - pad_w_half,
borderType=cv2.BORDER_CONSTANT,
value=self.mean_val)
if label is not None:
label = cv2.copyMakeBorder(label,
top=pad_h_half,
left=pad_w_half,
bottom=pad_h - pad_h_half,
right=pad_w - pad_w_half,
borderType=cv2.BORDER_CONSTANT,
value=self.ignore_label)
return image, label
# TODO
class CenterCrop(object):
def __init__(self, size):
self.size = size
def __call__(self, image, label):
h, w = image.shape[:2]
h_start = int((h - self.size) / 2)
w_start = int((w - self.size) / 2)
image = image[h_start:h_start+self.size, w_start:w_start+self.size, :]
label = label[h_start:h_start+self.size, w_start:w_start+self.size, :]
return image, label
# TODO
class Resize(object):
@staticmethod
def resize(srcimg, height, width):
if(len(srcimg.shape) > 2):
dstimg = cv2.resize(srcimg, (height, width), interpolation=cv2.INTER_NEAREST)
else:
dstimg = cv2.resize(srcimg, (height, width), interpolation=cv2.INTER_LINEAR)
return dstimg
# TODO
class RandomFlip(object):
def __call__(self, image, label):
if(np.random.randint(2) % 2 == 1):
image = np.fliplr(image)
label = np.fliplr(label)
return image, label
# TODO
class RandomCrop(object):
def __init__(self, size=256):
self.size = size
def __call__(self, image, label):
h, w = label.shape[:2]
crop_y = np.random.randint(0, h-self.size)
crop_x = np.random.randint(0, w-self.size)
image = image[crop_y:crop_y + self.size, crop_x:crop_x + self.size, :]
label = label[crop_y:crop_y + self.size, crop_x:crop_x + self.size]
return image, label
# TODO
class Scale(object):
def __init__(self, scale):
self.scale = scale
def __call__(self, image, label):
h, w = image.shape[:2]
h_new, w_new = int(h * self.scale), int(w * self.scale)
image = Resize.resize(image, h_new, w_new)
label = Resize.resize(label, h_new, w_new)
centercrop = CenterCrop(h)
return centercrop(image, label)
# TODO
class RandomScale(object):
def __init__(self, scale_max):
self.scale_max = scale_max
def __call__(self, image, label):
scale = 1.0 + np.random.rand() * (self.scale_max - 1.0)
h, w = image.shape[:2]
h_new, w_new = int(h * scale), int(w * scale)
image = Resize.resize(image, h_new, w_new)
label = Resize.resize(label, h_new, w_new)
centercrop = CenterCrop(h)
return centercrop(image, label)
def main():
image = cv2.imread('./work/dummy_data/JPEGImages/2008_000064.jpg')
label = cv2.imread('./work/dummy_data/GroundTruth_trainval_png/2008_000064.png')
result_dir = './work/transformed/'
# TODO: crop_size
image = Resize.resize(image, 480, 640)
label = Resize.resize(label, 480, 640)
# TODO: Transform: RandomSacle, RandomFlip, Pad, RandomCrop
compose = Compose([
RandomCrop(256),
RandomScale(2.0),
RandomFlip(),
Pad(256)
])
for i in range(10):
# TODO: call transform
dstimg, dstlab = compose(image, label)
# TODO: save image
cv2.imwrite(result_dir + str(i) + '_image.jpg', dstimg)
cv2.imwrite(result_dir + str(i) + '_label.jpg', dstlab)
if __name__ == "__main__":
main()
课程链接:https://aistudio.baidu.com/aistudio/education/group/info/1767