[拆轮子] PaddleDetection 中的预处理 PadBatch

该API主要就是将 img 的下侧和右侧填充0,将同一个 batch 的照片填充为相同大小的图片

PadBatch 代码:

@register_op
class PadBatch(BaseOperator):
    """
    Pad a batch of samples so they can be divisible by a stride.
    The layout of each image should be 'CHW'.
    Args:
        pad_to_stride (int): If `pad_to_stride > 0`, pad zeros to ensure
            height and width is divisible by `pad_to_stride`.
    """

    def __init__(self, pad_to_stride=0):
        super(PadBatch, self).__init__()
        self.pad_to_stride = pad_to_stride

    def __call__(self, samples, context=None):
        """
        Args:
            samples (list): a batch of sample, each is dict.
        """
        coarsest_stride = self.pad_to_stride

        # multi scale input is nested list
        if isinstance(samples,
                      typing.Sequence) and len(samples) > 0 and isinstance(
                          samples[0], typing.Sequence):
            inner_samples = samples[0]
        else:
            inner_samples = samples

        max_shape = np.array(
            [data['image'].shape for data in inner_samples]).max(axis=0)
        if coarsest_stride > 0:
            max_shape[1] = int(
                np.ceil(max_shape[1] / coarsest_stride) * coarsest_stride)
            max_shape[2] = int(
                np.ceil(max_shape[2] / coarsest_stride) * coarsest_stride)

        for data in inner_samples:
            im = data['image']
            im_c, im_h, im_w = im.shape[:]
            padding_im = np.zeros(
                (im_c, max_shape[1], max_shape[2]), dtype=np.float32)
            padding_im[:, :im_h, :im_w] = im
            data['image'] = padding_im
            if 'semantic' in data and data['semantic'] is not None:
                semantic = data['semantic']
                padding_sem = np.zeros(
                    (1, max_shape[1], max_shape[2]), dtype=np.float32)
                padding_sem[:, :im_h, :im_w] = semantic
                data['semantic'] = padding_sem
            if 'gt_segm' in data and data['gt_segm'] is not None:
                gt_segm = data['gt_segm']
                padding_segm = np.zeros(
                    (gt_segm.shape[0], max_shape[1], max_shape[2]),
                    dtype=np.uint8)
                padding_segm[:, :im_h, :im_w] = gt_segm
                data['gt_segm'] = padding_segm

        return samples

传入的 sample 数据:

>>> samples[0].keys()
dict_keys(['im_id', 'h', 'w', 'is_crowd', 'gt_class', 'gt_bbox', 'curr_iter', 'image', 'im_shape', 'scale_factor', 'flipped'])

>>> samples[0]
{'curr_iter': 0,
 'flipped': True,
 'gt_bbox': array([[139.37924,  31.7056 , 437.1072 , 720.53754]], dtype=float32),
 'gt_class': array([[14]], dtype=int32),
 'h': 640.0,
 'w': 475.0
 'im_id': array([270705]),
 'im_shape': array([819.2, 608. ], dtype=float32),
 'image': array([[[-1.5699117 , -1.5527871 , -1.5870365 , ...,  2.2489083 ,
 		  ......
          2.1171246 ,  2.1345537 ]]], dtype=float32),
 'is_crowd': array([[0]], dtype=int32),
 'scale_factor': array([1.28, 1.28], dtype=float32),
}

该 transform 的作用是,取同一个 batch 的所有照片,找最大的图片长宽,然后填充更多的0,使得该长宽可以整除 pad_to_stride 值,

该 transform 的作用和 PadMaskBatch的作用类似:
https://blog.csdn.net/HaoZiHuang/article/details/128421348
只不过后者会返回 mask , 而 PadBatch 不返回mask,只返回原图的宽高

# 找到图片最大的尺度
 max_shape = np.array(
     [data['image'].shape for data in inner_samples]).max(axis=0)

 # 把 max_shape 换成可以整除的
 if coarsest_stride > 0:
     max_shape[1] = int(
         np.ceil(max_shape[1] / coarsest_stride) * coarsest_stride)    # np.ceil 是向上取整的意思
     max_shape[2] = int(
         np.ceil(max_shape[2] / coarsest_stride) * coarsest_stride)
for data in inner_samples: # 迭代每一张图片的信息


	# --------- 将图片放到全部填充为0的 padding_im --------- 
    im = data['image']
    im_c, im_h, im_w = im.shape[:]
    padding_im = np.zeros(
        (im_c, max_shape[1], max_shape[2]), dtype=np.float32)
    padding_im[:, :im_h, :im_w] = im
    data['image'] = padding_im

	# 以下是对分割信息的处理
    if 'semantic' in data and data['semantic'] is not None:
        semantic = data['semantic']
        padding_sem = np.zeros(
            (1, max_shape[1], max_shape[2]), dtype=np.float32)
        padding_sem[:, :im_h, :im_w] = semantic
        data['semantic'] = padding_sem
    if 'gt_segm' in data and data['gt_segm'] is not None:
        gt_segm = data['gt_segm']
        padding_segm = np.zeros(
            (gt_segm.shape[0], max_shape[1], max_shape[2]),
            dtype=np.uint8)
        padding_segm[:, :im_h, :im_w] = gt_segm
        data['gt_segm'] = padding_segm


        return samples
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值