crop
crop的尺寸尽可能的是原图尺寸:
比如原图(3000, 2000); 网络输入为 300 * 300; 那么这种目标物必定缩小10倍;小目标就更小了; 如果在(3000, 2000)中随机获得( 300 * 300)那么, 就不需要缩小10倍;或者随机获得( 300 * 300)*c 的倍数; 比如3倍(900,900)的roi再 resize到(300 * 300); 那么就目标物只缩小了3倍; 对于小目标非常友好;
def get_affine_transform(center,
scale,
rot,
output_size,
shift=np.array([0, 0], dtype=np.float32),
inv=0,
resize_keep_ratio=False):
# print("exinren ",scale[0] /output_size[0], scale[1] / output_size[1])
if not isinstance(scale, np.ndarray) and not isinstance(scale, list):
scale = np.array([scale, scale], dtype=np.float32)
scale_tmp = scale
src_w = scale_tmp[0]
src_h = scale_tmp[1]
dst_w = output_size[0]
dst_h = output_size[1]
rot_rad = np.pi * rot / 180
if resize_keep_ratio:
src_dir = get_dir([0, src_h * -0.5], rot_rad)
dst_dir_h = dst_h if src_w / src_h < dst_w / dst_h else dst_w / src_w * src_h # 这么算终于解决了resize问题了吗????
dst_dir = np.array([0, dst_dir_h * -0.5], np.float32)
else:
src_dir = get_dir([0, src_h * -0.5], rot_rad)
dst_dir = np.array([0, dst_h * -0.5], np.float32)
src = np.zeros((3, 2), dtype=np.float32)
dst = np.zeros((3, 2), dtype=np.float32)
src[0, :] = center + scale_tmp * shift
src[1, :] = center + src_dir + scale_tmp * shift
dst[0, :] = [dst_w * 0.5, dst_h * 0.5]
dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5], np.float32) + dst_dir
if resize_keep_ratio:
src[2:, :] = get_3rd_point_ver2(src[0, :], src[1, :], scale[1], scale[0])
dst[2:, :] = get_3rd_point_ver2(dst[0, :], dst[1, :], scale[1], scale[0])
else:
src[2:, :] = get_3rd_point_ver2(src[0, :], src[1, :], scale[1], scale[0])
dst[2:, :] = get_3rd_point_ver2(dst[0, :], dst[1, :], dst_h, dst_w)
if inv:
trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
else:
trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
return trans
def affine_transform(pt, t):
new_pt = np.array([pt[0], pt[1], 1.], dtype=np.float32).T
new_pt = np.dot(t, new_pt) # 旧的点点乘仿射变换矩阵,得到输出图像上的点
return new_pt[:2]
def _random_crop_ioa(img,
s,
c,
input_w,
input_h,
origin_bboxes,
random_stretch=False,
resize_keep_ratio=True,
ioa_thres=0.85):
times = 0
while True:
# randomly choose a mode
mode = (ioa_thres, None)
if times > 30:
# cc = c
ss, cc = _random_center(img, s, c, random_stretch)
ss = np.array([input_w, input_h]) * np.clip(np.random.rand() * 3, 1, 3)
# print("oh no")
return ss, cc
# print('cannot crop')
min_ioa, max_ioa = mode
if min_ioa is None:
min_ioa = float('-inf')
for _ in range(30):
bboxes = copy.deepcopy(origin_bboxes)
ss, cc = _random_center(img, s, c, random_stretch)
ss = np.array([input_w, input_h]) * np.clip(np.random.rand() * 3, 1, 3)
# ss = np.array([input_w, input_h]) * 1
# ioa
trans_input = get_affine_transform(
cc, ss, 0, [input_w, input_h], resize_keep_ratio=resize_keep_ratio)
# print("trans_input ", trans_input)
ioas = []
for bbox in bboxes:
# print(type(bbox))
# print(bbox)
bbox[:2] = affine_transform(bbox[:2], trans_input) # 把bbox两个点转换到输出大小的坐标
bbox[2:] = affine_transform(bbox[2:], trans_input)
ori_h, ori_w = bbox[3] - bbox[1], bbox[2] - bbox[0]
bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, input_w - 1) # 裁剪,把值限制在0~output_w - 1之间,防止越界
bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, input_h - 1)
cut_h, cut_w = bbox[3] - bbox[1], bbox[2] - bbox[0] # 输出大小下的高宽
ioa = (cut_h * cut_w) / (ori_h * ori_w)
# print("w h w h ", cut_h, cut_w, ori_h, ori_w, " ioa ", ioa)
ioas.append(ioa)
if len(ioas) == 0:
continue
if min(ioas) > min_ioa:
return ss, cc
else:
# print('ioa={} failed'.format(min_ioa))
continue
times += 1
def resize_and_ioa_random_crop(meta, shape, with_mask, with_keypoints, random_stretch=False, resize_keep_ratio=True):
img = meta['img']
bboxes = meta['gt_bboxes']
labels = meta['gt_labels']
# # ======================debug==========
# for i in range(len(bboxes)):
# bbox = bboxes[i]
# add_coco_bbox(img, bbox, labels[i] , 0, class_names)
#
# z_img = cv2.resize(img, (1920, 1080), interpolation=cv2.INTER_AREA)
# cv2.imshow('det_', z_img)
# cv2.waitKey(0)
# # ======================debug==========
if with_mask:
masks = meta['gt_masks']
if with_keypoints:
keypoints = meta['gt_keypoints']
height, width = img.shape[0], img.shape[1]
c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
# c = np.array(shape, dtype=np.float32)
s = np.array([img.shape[1], img.shape[0]], dtype=np.float32)
input_w, input_h = shape
ss, cc = _random_crop_ioa(img, s, c, input_w, input_h, bboxes, random_stretch, resize_keep_ratio)
# print("cc ",cc, ss)
trans_affine = get_affine_transform(
cc, ss, 0, [input_w, input_h], resize_keep_ratio=resize_keep_ratio)
# print("as ", trans_affine)
trans_img = cv2.warpAffine(img, trans_affine, (input_w, input_h), flags=cv2.INTER_LINEAR)
new_bbox = []
new_labels = []
# for bbox in bboxes: # TODO: 优化仿射变换速度
for i in range(len(bboxes)):
bbox = bboxes[i]
bbox[:2] = affine_transform(bbox[:2], trans_affine)
bbox[2:] = affine_transform(bbox[2:], trans_affine)
ori_h, ori_w = bbox[3] - bbox[1], bbox[2] - bbox[0]
bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, input_w - 1)
bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, input_h - 1)
cut_h, cut_w = bbox[3] - bbox[1], bbox[2] - bbox[0] # 输出大小下的高宽
ioa = (cut_h * cut_w) / (ori_h * ori_w)
# print("ioa ", cut_h , cut_w, ori_h, ori_w, " ioa ",ioa)
# todo ignore
# 目标过小,进行涂黑
if 0 < cut_h <= 6 or 0 < cut_w <= 6:
# cv2.imshow('after1', trans_img)
trans_img[int(bbox[1]):int(bbox[3]), int(bbox[0]):int(bbox[2])] = 0
# cv2.imshow('after', trans_img)
# cv2.waitKey(0)
elif ioa > 0.5:
new_bbox.append(bbox)
new_labels.append(labels[i])
elif cut_h > 0 and cut_w > 0: # ioa过小,进行涂黑
trans_img[ int(bbox[1]):int(bbox[3]), int(bbox[0]):int(bbox[2])] = 0
trans_meta = dict(img=trans_img,
gt_bboxes=np.array(new_bbox),
gt_labels=np.array(new_labels))
if with_mask:
trans_masks = []
for mask in masks:
trans_mask = cv2.warpAffine(mask, trans_affine, (input_w, input_h), flags=cv2.INTER_LINEAR)
trans_masks.append(trans_mask)
trans_meta['gt_masks'] = trans_masks
if with_keypoints:
for keypoint in keypoints:
keypoint.shape = 17, 3
for j in range(17):
if keypoint[j, 2] > 0:
keypoint[j, :2] = affine_transform(keypoint[j, :2], trans_affine)
keypoint[:, 0] = np.clip(keypoint[:, 0], 0, input_w - 1)
keypoint[:, 1] = np.clip(keypoint[:, 1], 0, input_h - 1)
keypoint.shape = 51, 1
trans_meta['gt_keypoints'] = keypoints
# # # ======================debug==========
# for i in range(len(new_bbox)):
# bbox = new_bbox[i]
# add_coco_bbox(trans_img, bbox, new_labels[i], 0, class_names)
# # cv2.namedWindow('after', 0)
#
# cv2.imshow('after', trans_img)
# cv2.waitKey(0)
# # # ======================debug==========
return trans_meta
FPN
加入多尺寸