小目标的目标检测

最新推荐文章于 2023-10-02 20:50:47 发布

zisuina_2

最新推荐文章于 2023-10-02 20:50:47 发布

阅读量281

点赞数

分类专栏：深度学习

本文链接：https://blog.csdn.net/zisuina_2/article/details/118025652

版权

深度学习专栏收录该内容

7 篇文章 0 订阅

订阅专栏

crop

crop的尺寸尽可能的是原图尺寸：
比如原图（3000， 2000）；网络输入为 300 * 300；那么这种目标物必定缩小10倍；小目标就更小了；如果在（3000， 2000）中随机获得（ 300 * 300）那么，就不需要缩小10倍；或者随机获得（ 300 * 300）*c 的倍数；比如3倍（900，900）的roi再 resize到（300 * 300）；那么就目标物只缩小了3倍；对于小目标非常友好；


def get_affine_transform(center,
                         scale,
                         rot,
                         output_size,
                         shift=np.array([0, 0], dtype=np.float32),
                         inv=0,
                         resize_keep_ratio=False):
    # print("exinren ",scale[0] /output_size[0], scale[1] / output_size[1])
    if not isinstance(scale, np.ndarray) and not isinstance(scale, list):
        scale = np.array([scale, scale], dtype=np.float32)

    scale_tmp = scale
    src_w = scale_tmp[0]
    src_h = scale_tmp[1]
    dst_w = output_size[0]
    dst_h = output_size[1]

    rot_rad = np.pi * rot / 180
    if resize_keep_ratio:
        src_dir = get_dir([0, src_h * -0.5], rot_rad)
        dst_dir_h = dst_h if src_w / src_h < dst_w / dst_h else dst_w / src_w * src_h  # 这么算终于解决了resize问题了吗？？？？
        dst_dir = np.array([0, dst_dir_h * -0.5], np.float32)
    else:
        src_dir = get_dir([0, src_h * -0.5], rot_rad)
        dst_dir = np.array([0, dst_h * -0.5], np.float32)
    src = np.zeros((3, 2), dtype=np.float32)
    dst = np.zeros((3, 2), dtype=np.float32)
    src[0, :] = center + scale_tmp * shift
    src[1, :] = center + src_dir + scale_tmp * shift
    dst[0, :] = [dst_w * 0.5, dst_h * 0.5]
    dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5], np.float32) + dst_dir

    if resize_keep_ratio:
        src[2:, :] = get_3rd_point_ver2(src[0, :], src[1, :], scale[1], scale[0])
        dst[2:, :] = get_3rd_point_ver2(dst[0, :], dst[1, :], scale[1], scale[0])
    else:
        src[2:, :] = get_3rd_point_ver2(src[0, :], src[1, :], scale[1], scale[0])
        dst[2:, :] = get_3rd_point_ver2(dst[0, :], dst[1, :], dst_h, dst_w)

    if inv:
        trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
    else:
        trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))

    return trans


def affine_transform(pt, t):
    new_pt = np.array([pt[0], pt[1], 1.], dtype=np.float32).T
    new_pt = np.dot(t, new_pt)  # 旧的点点乘仿射变换矩阵，得到输出图像上的点
    return new_pt[:2]


def _random_crop_ioa(img,
                     s,
                     c,
                     input_w,
                     input_h,
                     origin_bboxes,
                     random_stretch=False,
                     resize_keep_ratio=True,
                     ioa_thres=0.85):
    times = 0
    while True:
        # randomly choose a mode
        mode = (ioa_thres, None)
        if times > 30:
            # cc = c
            ss, cc = _random_center(img, s, c, random_stretch)
            ss = np.array([input_w, input_h]) * np.clip(np.random.rand() * 3, 1, 3)

            # print("oh no")
            return ss, cc
            # print('cannot crop')

        min_ioa, max_ioa = mode
        if min_ioa is None:
            min_ioa = float('-inf')
        for _ in range(30):
            bboxes = copy.deepcopy(origin_bboxes)
            ss, cc = _random_center(img, s, c, random_stretch)
            ss = np.array([input_w, input_h]) * np.clip(np.random.rand() * 3, 1, 3)
            # ss = np.array([input_w, input_h]) * 1

            # ioa
            trans_input = get_affine_transform(
                cc, ss, 0, [input_w, input_h], resize_keep_ratio=resize_keep_ratio)
            # print("trans_input ", trans_input)

            ioas = []
            for bbox in bboxes:
                # print(type(bbox))
                # print(bbox)
                bbox[:2] = affine_transform(bbox[:2], trans_input)  # 把bbox两个点转换到输出大小的坐标
                bbox[2:] = affine_transform(bbox[2:], trans_input)
                ori_h, ori_w = bbox[3] - bbox[1], bbox[2] - bbox[0]
                bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, input_w - 1)  # 裁剪，把值限制在0～output_w - 1之间，防止越界
                bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, input_h - 1)
                cut_h, cut_w = bbox[3] - bbox[1], bbox[2] - bbox[0]  # 输出大小下的高宽
                ioa = (cut_h * cut_w) / (ori_h * ori_w)
                # print("w h w h ", cut_h, cut_w, ori_h, ori_w, "    ioa ", ioa)
                ioas.append(ioa)
            if len(ioas) == 0:
                continue
            if min(ioas) > min_ioa:
                return ss, cc

            else:
                # print('ioa={} failed'.format(min_ioa))
                continue
        times += 1


def resize_and_ioa_random_crop(meta, shape, with_mask, with_keypoints, random_stretch=False, resize_keep_ratio=True):
    img = meta['img']
    bboxes = meta['gt_bboxes']
    labels = meta['gt_labels']


    # # ======================debug==========
    # for i in range(len(bboxes)):
    #     bbox = bboxes[i]
    #     add_coco_bbox(img, bbox, labels[i] , 0, class_names)
    #
    # z_img = cv2.resize(img, (1920, 1080), interpolation=cv2.INTER_AREA)
    # cv2.imshow('det_', z_img)
    # cv2.waitKey(0)
    # # ======================debug==========


    if with_mask:
        masks = meta['gt_masks']
    if with_keypoints:
        keypoints = meta['gt_keypoints']
    height, width = img.shape[0], img.shape[1]
    c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
    # c = np.array(shape, dtype=np.float32)
    s = np.array([img.shape[1], img.shape[0]], dtype=np.float32)
    input_w, input_h = shape
    ss, cc = _random_crop_ioa(img, s, c, input_w, input_h, bboxes, random_stretch, resize_keep_ratio)
    # print("cc ",cc, ss)

    trans_affine = get_affine_transform(
        cc, ss, 0, [input_w, input_h], resize_keep_ratio=resize_keep_ratio)
    # print("as ", trans_affine)
    trans_img = cv2.warpAffine(img, trans_affine, (input_w, input_h), flags=cv2.INTER_LINEAR)
    new_bbox = []
    new_labels = []
    # for bbox in bboxes:  # TODO: 优化仿射变换速度

    for i in range(len(bboxes)):
        bbox = bboxes[i]
        bbox[:2] = affine_transform(bbox[:2], trans_affine)
        bbox[2:] = affine_transform(bbox[2:], trans_affine)
        ori_h, ori_w = bbox[3] - bbox[1], bbox[2] - bbox[0]
        bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, input_w - 1)
        bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, input_h - 1)
        cut_h, cut_w = bbox[3] - bbox[1], bbox[2] - bbox[0]  # 输出大小下的高宽
        ioa = (cut_h * cut_w) / (ori_h * ori_w)
        # print("ioa ", cut_h , cut_w, ori_h, ori_w, "   ioa ",ioa)
        # todo ignore
        # 目标过小，进行涂黑
        if 0 < cut_h <= 6 or 0 < cut_w <= 6:
            # cv2.imshow('after1', trans_img)
            trans_img[int(bbox[1]):int(bbox[3]), int(bbox[0]):int(bbox[2])] = 0
            # cv2.imshow('after', trans_img)
            # cv2.waitKey(0)
        elif ioa > 0.5:
            new_bbox.append(bbox)
            new_labels.append(labels[i])
        elif cut_h > 0 and cut_w > 0: # ioa过小，进行涂黑
            trans_img[ int(bbox[1]):int(bbox[3]), int(bbox[0]):int(bbox[2])] = 0


    trans_meta = dict(img=trans_img,
                      gt_bboxes=np.array(new_bbox),
                      gt_labels=np.array(new_labels))

    if with_mask:
        trans_masks = []
        for mask in masks:
            trans_mask = cv2.warpAffine(mask, trans_affine, (input_w, input_h), flags=cv2.INTER_LINEAR)
            trans_masks.append(trans_mask)
        trans_meta['gt_masks'] = trans_masks
    if with_keypoints:
        for keypoint in keypoints:
            keypoint.shape = 17, 3
            for j in range(17):
                if keypoint[j, 2] > 0:
                    keypoint[j, :2] = affine_transform(keypoint[j, :2], trans_affine)
            keypoint[:, 0] = np.clip(keypoint[:, 0], 0, input_w - 1)
            keypoint[:, 1] = np.clip(keypoint[:, 1], 0, input_h - 1)
            keypoint.shape = 51, 1
        trans_meta['gt_keypoints'] = keypoints


    # # # ======================debug==========
    # for i in range(len(new_bbox)):
    #     bbox = new_bbox[i]
    #     add_coco_bbox(trans_img, bbox, new_labels[i], 0, class_names)
    # # cv2.namedWindow('after', 0)
    #
    # cv2.imshow('after', trans_img)
    # cv2.waitKey(0)
    # # # ======================debug==========
    return trans_meta