NormalizeBox
和 NormalizeImage
很像,都是缩放到某个区间
NormalizeBox
就是把gt_bbox的位置缩放到 [0,1]
class NormalizeBox(BaseOperator):
"""Transform the bounding box's coornidates to [0,1]."""
def __init__(self):
super(NormalizeBox, self).__init__()
def apply(self, sample, context):
im = sample['image']
gt_bbox = sample['gt_bbox']
height, width, _ = im.shape
# ------------ 缩放边界框 gt_bbox ------------
for i in range(gt_bbox.shape[0]):
gt_bbox[i][0] = gt_bbox[i][0] / width
gt_bbox[i][1] = gt_bbox[i][1] / height
gt_bbox[i][2] = gt_bbox[i][2] / width
gt_bbox[i][3] = gt_bbox[i][3] / height
sample['gt_bbox'] = gt_bbox
# ------------ 缩放关键点部分 ------------
if 'gt_keypoint' in sample.keys():
gt_keypoint = sample['gt_keypoint']
# 关键点的标注顺序 x1y1x2y2x3y3....
for i in range(gt_keypoint.shape[1]):
if i % 2:
gt_keypoint[:, i] = gt_keypoint[:, i] / height
else:
gt_keypoint[:, i] = gt_keypoint[:, i] / width
sample['gt_keypoint'] = gt_keypoint
return sample
class BboxXYXY2XYWH(BaseOperator):
"""
Convert bbox XYXY format to XYWH format.
"""
def __init__(self):
super(BboxXYXY2XYWH, self).__init__()
def apply(self, sample, context=None):
assert 'gt_bbox' in sample
bbox = sample['gt_bbox']
bbox[:, 2:4] = bbox[:, 2:4] - bbox[:, :2] # 拿到宽高
bbox[:, :2] = bbox[:, :2] + bbox[:, 2:4] / 2. # 将左上角移动至中心
sample['gt_bbox'] = bbox
return sample
没传入该 apply
可视化:
im = sample['image']
img = (sample['image'] * [0.229, 0.224,0.225] + [0.485, 0.456, 0.406]) * 255
img = img.astype(int).astype("uint8")
h, w, _ = img.shape
gt_bboxes = sample['gt_bbox'] * [w, h, w, h]
gt_bboxes = gt_bboxes.astype(int)
x1, y1, x2, y2 = gt_bboxes[1]
xx = cv2.rectangle(img, (x1, y1), (x2, y2), 255, thickness=2, lineType=8)
cv2.imwrite("xxx.png", xx)
传出该 apply
可视化:
im = sample['image']
img = (sample['image'] * [0.229, 0.224,0.225] + [0.485, 0.456, 0.406]) * 255
img = img.astype(int).astype("uint8")
h, w, _ = img.shape
gt_bboxes = sample['gt_bbox'] * [w, h, w, h]
gt_bboxes = gt_bboxes.astype(int)
x_c, y_c, w, h = gt_bboxes[3]
x1, y1, x2, y2 = x_c-w//2, y_c-h//2, x_c+w//2, y_c+h//2
xx = cv2.rectangle(img, (x1, y1), (x2, y2), 255, thickness=2, lineType=8)
cv2.imwrite("xxx.png", xx)
注意此处的 sample['h']
sample['w']
都是原图的尺寸
Permute 的话就很简单了,就是 HWC 转化为 CHW
class Permute(BaseOperator):
def __init__(self):
"""
Change the channel to be (C, H, W)
"""
super(Permute, self).__init__()
def apply(self, sample, context=None):
im = sample['image']
im = im.transpose((2, 0, 1))
sample['image'] = im
return sample