包括左右翻转,随机缩放,随机改变H,S,V等等
def random_flip(self, img, boxes):
if random.random() < 0.5:
return img, boxes
h, w, _ = img.shape
img = np.fliplr(img)
#在左右方向上翻转每行的元素,似乎都是作用在y轴,(第二个维度上)
x1, x2 = boxes[:, 0], boxes[:, 2]
x1_new = w - x2
x2_new = w - x1
boxes[:, 0], boxes[:, 2] = x1_new, x2_new
#随机左右翻转图片,框也跟着变化
return img, boxes
def random_scale(self, img, boxes):
if random.random() < 0.5:
return img, boxes
scale = random.uniform(0.8, 1.2)#随机生成一个实数在x,y之间
h, w, _ = img.shape
img = cv2.resize(img, dsize=(int(w * scale), h), interpolation=cv2.INTER_LINEAR)
scale_tensor = torch.FloatTensor([[scale, 1.0, scale, 1.0]]).expand_as(boxes)
boxes = boxes * scale_tensor
#随机缩放图片和框
return img, boxes
def random_blur(self, bgr):
if random.random() < 0.5:
return bgr
ksize = random.choice([2, 3, 4, 5])
bgr = cv2.blur(bgr, (ksize, ksize))#均值滤波
return bgr
def random_brightness(self, bgr):
if random.random() < 0.5:
return bgr
hsv = cv2.cvtColor(bgr, cv2.COLOR_BGR2HSV)
h, s, v = cv2.split(hsv)
adjust = random.uniform(0.5, 1.5)
v = v * adjust
v = np.clip(v, 0, 255).astype(hsv.dtype)
hsv = cv2.merge((h, s, v))
bgr = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)
return bgr
def random_hue(self, bgr):
if random.random() < 0.5:
return bgr
hsv = cv2.cvtColor(bgr, cv2.COLOR_BGR2HSV)
h, s, v = cv2.split(hsv)
adjust = random.uniform(0.8, 1.2)
h = h * adjust
h = np.clip(h, 0, 255).astype(hsv.dtype)
hsv = cv2.merge((h, s, v))
bgr = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)
return bgr
def random_saturation(self, bgr):
if random.random() < 0.5:
return bgr
hsv = cv2.cvtColor(bgr, cv2.COLOR_BGR2HSV)
h, s, v = cv2.split(hsv)
adjust = random.uniform(0.5, 1.5)
s = s * adjust
s = np.clip(s, 0, 255).astype(hsv.dtype)
hsv = cv2.merge((h, s, v))
bgr = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)
return bgr
def random_shift(self, img, boxes, labels):
if random.random() < 0.5:
return img, boxes, labels
center = (boxes[:, 2:] + boxes[:, :2]) / 2.0#框的中点
h, w, c = img.shape
img_out = np.zeros((h, w, c), dtype=img.dtype)
mean_bgr = self.mean[::-1]
img_out[:, :] = mean_bgr
dx = random.uniform(-w*0.2, w*0.2)
dy = random.uniform(-h*0.2, h*0.2)
dx, dy = int(dx), int(dy)
if dx >= 0 and dy >= 0:
img_out[dy:, dx:] = img[:h-dy, :w-dx]
elif dx >= 0 and dy < 0:
img_out[:h+dy, dx:] = img[-dy:, :w-dx]
elif dx < 0 and dy >= 0:
img_out[dy:, :w+dx] = img[:h-dy, -dx:]
elif dx < 0 and dy < 0:
img_out[:h+dy, :w+dx] = img[-dy:, -dx:]
center = center + torch.FloatTensor([[dx, dy]]).expand_as(center) # [n, 2]
mask_x = (center[:, 0] >= 0) & (center[:, 0] < w) # [n,]
mask_y = (center[:, 1] >= 0) & (center[:, 1] < h) # [n,]
mask = (mask_x & mask_y).view(-1, 1) # [n, 1], mask for the boxes within the image after shift.
boxes_out = boxes[mask.expand_as(boxes)].view(-1, 4) # [m, 4]
if len(boxes_out) == 0:
return img, boxes, labels
shift = torch.FloatTensor([[dx, dy, dx, dy]]).expand_as(boxes_out) # [m, 4]
boxes_out = boxes_out + shift
boxes_out[:, 0] = boxes_out[:, 0].clamp_(min=0, max=w)
boxes_out[:, 2] = boxes_out[:, 2].clamp_(min=0, max=w)
boxes_out[:, 1] = boxes_out[:, 1].clamp_(min=0, max=h)
boxes_out[:, 3] = boxes_out[:, 3].clamp_(min=0, max=h)
labels_out = labels[mask.view(-1)]
return img_out, boxes_out, labels_out
def random_crop(self, img, boxes, labels):
if random.random() < 0.5:
return img, boxes, labels
center = (boxes[:, 2:] + boxes[:, :2]) / 2.0
h_orig, w_orig, _ = img.shape
h = random.uniform(0.6 * h_orig, h_orig)
w = random.uniform(0.6 * w_orig, w_orig)
y = random.uniform(0, h_orig - h)
x = random.uniform(0, w_orig - w)
h, w, x, y = int(h), int(w), int(x), int(y)
center = center - torch.FloatTensor([[x, y]]).expand_as(center) # [n, 2]
mask_x = (center[:, 0] >= 0) & (center[:, 0] < w) # [n,]
mask_y = (center[:, 1] >= 0) & (center[:, 1] < h) # [n,]
mask = (mask_x & mask_y).view(-1, 1) # [n, 1], mask for the boxes within the image after crop.
boxes_out = boxes[mask.expand_as(boxes)].view(-1, 4) # [m, 4]
if len(boxes_out) == 0:
return img, boxes, labels
shift = torch.FloatTensor([[x, y, x, y]]).expand_as(boxes_out) # [m, 4]
boxes_out = boxes_out - shift
boxes_out[:, 0] = boxes_out[:, 0].clamp_(min=0, max=w)
boxes_out[:, 2] = boxes_out[:, 2].clamp_(min=0, max=w)
boxes_out[:, 1] = boxes_out[:, 1].clamp_(min=0, max=h)
boxes_out[:, 3] = boxes_out[:, 3].clamp_(min=0, max=h)
labels_out = labels[mask.view(-1)]
img_out = img[y:y+h, x:x+w, :]
return img_out, boxes_out, labels_out