深度学习中常见的预处理操作
import cv2
import numpy as np
import random
import matplotlib.pyplot as plt
# 连接各个transform操作
class Compose(object):
def __init__(self, transforms):
self.transforms = transforms
def __call__(self, image, label=None):
for t in self.transforms:
image, label = t(image, label)
return image, label
# 标准化
class Normalize(object):
def __init__(self, mean_val, std_val, val_scale=1):
# set val_scale = 1 if mean and std are in range (0,1)
# set val_scale to other value, if mean and std are in range (0,255)
self.mean = np.array(mean_val, dtype=np.float32)
self.std = np.array(std_val, dtype=np.float32)
self.val_scale = 1 / 255.0 if val_scale == 1 else 1
def __call__(self, image, label=None):
image = image.astype(np.float32)
image = image * self.val_scale
image = image - self.mean
image = image * (1 / self.std)
return image, label
# 转换数据格式
class ConvertDataType(object):
def __call__(self, image, label=None):
if label is not None:
label = label.astype(np.int64)
return image.astype(np.float32), label
# 按照给定大小填充图片
class Pad(object):
def __init__(self, size, ignore_label=255, mean_val=0, val_scale=1):
# set val_scale to 1 if mean_val is in range (0, 1)
# set val_scale to 255 if mean_val is in range (0, 255)
factor = 255 if val_scale == 1 else 1
self.size = size
self.ignore_label = ignore_label
self.mean_val = mean_val
# from 0-1 to 0-255
if isinstance(self.mean_val, (tuple, list)):
self.mean_val = [int(x * factor) for x in self.mean_val]
else:
self.mean_val = int(self.mean_val * factor)
def __call__(self, image, label=None):
h, w, c = image.shape
pad_h = max(self.size - h, 0)
pad_w = max(self.size - w, 0)
pad_h_half = int(pad_h / 2)
pad_w_half = int(pad_w / 2)
if pad_h > 0 or pad_w > 0:
image = cv2.copyMakeBorder(image,
top=pad_h_half,
left=pad_w_half,
bottom=pad_h - pad_h_half,
right=pad_w - pad_w_half,
borderType=cv2.BORDER_CONSTANT,
value=self.mean_val)
if label is not None:
label = cv2.copyMakeBorder(label,
top=pad_h_half,
left=pad_w_half,
bottom=pad_h - pad_h_half,
right=pad_w - pad_w_half,
borderType=cv2.BORDER_CONSTANT,
value=self.ignore_label)
return image, label
# 中心裁剪
class CenterCrop(object):
def __init__(self, size):
assert type(size) in [int, tuple], 'not support size type'
if isinstance(size, int):
self.size = (size, size)
else:
self.size = size
def __call__(self, image, label):
h, w, _ = image.shape
start_h = (h - self.size[1]) // 2
start_w = (w - self.size[0]) // 2
try:
image = image[start_h: start_h + self.size[1], start_w: start_w + self.size[0], :]
label = label[start_h: start_h + self.size[1], start_w: start_w + self.size[0]]
except Exception as e:
print('CROP OUT OF IMAGE, RETURN ORIGIN IMAGE!')
return image, label
# 调整大小
class Resize(object):
def __init__(self, size=(256, 256)):
assert type(size) in [int, tuple], 'not support size type'
if isinstance(size, int):
self.size = (size, size)
else:
self.size = size
def __call__(self, image, label):
image = cv2.resize(image, (self.size[0], self.size[1]), interpolation=cv2.INTER_LINEAR)
label = cv2.resize(label, (self.size[0], self.size[1]), interpolation=cv2.INTER_NEAREST)
return image, label
# 随机水平或垂直翻转
class RandomFlip(object):
def __init__(self, prob=0.5):
self.prob = prob
def __call__(self, image, label):
p = random.random()
if p > self.prob: # 水平翻转
image = image[:, ::-1, :]
label = label[:, ::-1]
else: # 垂直翻转
image = image[::-1, :, :]
label = label[::-1, :]
return image, label
# 随机裁剪
class RandomCrop(object):
def __init__(self, size=(256,256)):
assert type(size) in [int, tuple], 'not support size type'
if isinstance(size, int):
self.size = (size, size)
else:
self.size = size
def __call__(self, image, label):
h, w, _ = image.shape
top = np.random.uniform(h - self.size[1])
left = np.random.uniform(w - self.size[0])
assert top >= 0, "crop size > image height !"
assert left >= 0, "crop size > image width !"
rect = np.array([int(left),
int(top),
int(left + self.size[0]),
int(top + self.size[1])])
image = image[rect[1]:rect[3], rect[0]:rect[2], :]
if label is not None:
label = label[rect[1]:rect[3], rect[0]:rect[2]]
return image, label
# 图片缩放
class Scale(object):
def __call__(self, image, label, scale=1.0):
if not isinstance(scale, (list, tuple)):
scale = (scale, scale)
h, w, _ = image.shape
image = cv2.resize(image, (int(w * scale[0]), int(h * scale[1])), interpolation=cv2.INTER_LINEAR)
if label is not None:
label = cv2.resize(label, (int(w * scale[0]), int(h * scale[1])), interpolation=cv2.INTER_NEAREST)
return image, label
# 随机缩放
class RandomScale(object):
def __init__(self, min_scale=0.5, max_scale=2.0, step=0.25):
self.min_scale = min_scale
self.max_scale = max_scale
self.step = step
self.scale = Scale()
def __call__(self, image, label):
if self.step == 0:
self.random_scale = np.random.uniform(self.min_scale,
self.max_scale,
1)[0]
else:
num_steps = int((self.max_scale - self.min_scale) / self.step + 1)
scale_factors = np.linspace(self.min_scale,
self.max_scale,
num_steps)
np.random.shuffle(scale_factors)
self.random_scale = scale_factors[0]
image, label = self.scale(image, label, self.random_scale)
return image, label
运行:
current_path = os.path.dirname(os.path.abspath(__file__))
image = cv2.imread('./dummy_data/JPEGImages/2008_000064.jpg', 1)
label = cv2.imread('./dummy_data/GroundTruth_trainval_png/2008_000064.png', 0)
# TODO: crop_size
crop_size = 256
# TODO: Transform: RandomSacle, RandomFlip, Pad, RandomCrop
transform = Compose([RandomScale(),
RandomFlip(),
Pad(crop_size, mean_val=[0.485, 0.456, 0.406]),
RandomCrop(crop_size),
ConvertDataType(),
Normalize(0, 1)])
save_path = os.path.join(current_path, 'transform')
if not os.path.exists(save_path):
os.makedirs(save_path)
for i in range(10):
# TODO: call transform
transformed_img, transformed_label = transform(image, label)
plt.imshow(transformed_img)
plt.show()
结果: