pytorch版本SSD代码分析(2)——数据增强

本代码是pytorch版本的ssd实现,来源amdegroot/ssd.pytorch

一、PhotometricDistort

class PhotometricDistort(object):
    def __init__(self):
        #定义6个操作
        self.pd = [
            RandomContrast(),
            ConvertColor(transform='HSV'),
            RandomSaturation(),
            RandomHue(),
            ConvertColor(current='HSV', transform='BGR'),
            RandomContrast()
        ]
        self.rand_brightness = RandomBrightness()
        self.rand_light_noise = RandomLightingNoise()

    def __call__(self, image, boxes, labels):
        im = image.copy()
        im, boxes, labels = self.rand_brightness(im, boxes, labels)
        if random.randint(2):
            distort = Compose(self.pd[:-1]) #最先做RandomContrast
        else:
            distort = Compose(self.pd[1:])  #最后做RandomContrast
        im, boxes, labels = distort(im, boxes, labels)
        return self.rand_light_noise(im, boxes, labels)

RandomBrightness(随机改变亮度):

在原有图片像素上加一个实数(实数的范围在[-32,32])

其中:random.randint(2):在0和1之间随机产生一个数,random.uniform(x, y) :将随机生成一个实数,它在 [x,y] 范围

class RandomBrightness(object):
    def __init__(self, delta=32):
        #默认delta=32,delta的范围要在0-255之间
        assert delta >= 0.0
        assert delta <= 255.0
        self.delta = delta

    def __call__(self, image, boxes=None, labels=None):
        if random.randint(2):
            delta = random.uniform(-self.delta, self.delta)
            image += delta
        return image, boxes, labels

RandomContrast(随机改变对比度):

在原图像素上乘一个系数(系数的范围在[0.5,1.5])

class RandomContrast(object):
    def __init__(self, lower=0.5, upper=1.5):
        self.lower = lower
        self.upper = upper
        assert self.upper >= self.lower, "contrast upper must be >= lower."
        assert self.lower >= 0, "contrast lower must be non-negative."

    # expects float image
    def __call__(self, image, boxes=None, labels=None):
        if random.randint(2):
            alpha = random.uniform(self.lower, self.upper)
            image *= alpha
        return image, boxes, labels

ConvertColor(变换颜色空间):

变换颜色空间,若当前为BGR则变换到HSV,若当前为HSV变换到BGR

其中,cv2.cvtColor函数功能是变换空间

class ConvertColor(object):
    def __init__(self, current='BGR', transform='HSV'):
        self.transform = transform  #要变换到HSV
        self.current = current      #当前默认BGR

    def __call__(self, image, boxes=None, labels=None):
        if self.current == 'BGR' and self.transform == 'HSV':
            image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
        elif self.current == 'HSV' and self.transform == 'BGR':
            image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
        else:
            raise NotImplementedError
        return image, boxes, labels

RandomSaturation(随机改变饱和度):

在HSV空间的S维度上乘一个系数(系数在范围[0.5,1.5]中随机得到一个实数)

class RandomSaturation(object):
    def __init__(self, lower=0.5, upper=1.5):
        self.lower = lower
        self.upper = upper
        assert self.upper >= self.lower, "contrast upper must be >= lower."
        assert self.lower >= 0, "contrast lower must be non-negative."

    def __call__(self, image, boxes=None, labels=None):
        if random.randint(2):
            image[:, :, 1] *= random.uniform(self.lower, self.upper)
        return image, boxes, labels

RandomHue(随机改变色调):

在HSV空间的H维度随机加一个实数(实数的范围[-18.0,18.0])

class RandomHue(object):
    def __init__(self, delta=18.0):
        assert delta >= 0.0 and delta <= 360.0
        self.delta = delta

    def __call__(self, image, boxes=None, labels=None):
        if random.randint(2):
            image[:, :, 0] += random.uniform(-self.delta, self.delta)
            image[:, :, 0][image[:, :, 0] > 360.0] -= 360.0   #大于360的值减360
            image[:, :, 0][image[:, :, 0] < 0.0] += 360.0     #小于0的值加上360
        return image, boxes, labels

RandomLightingNoise(随机变换通道):

设置了6中变换方式,随机选择一种,将BGR三个通道顺序改变

class RandomLightingNoise(object):
    def __init__(self):
        self.perms = ((0, 1, 2), (0, 2, 1),
                      (1, 0, 2), (1, 2, 0),
                      (2, 0, 1), (2, 1, 0))

    def __call__(self, image, boxes=None, labels=None):
        if random.randint(2):
            swap = self.perms[random.randint(len(self.perms))]
            shuffle = SwapChannels(swap)  # shuffle channels
            image = shuffle(image)
        return image, boxes, labels

二、Expand(随机扩张图片)

将原有图片的高和宽乘以一个ratio系数,将原有图片放在扩张后图片的右下角,其他位置像素值使用均值填充,相应的bbox也进行移动

class Expand(object):
    def __init__(self, mean):
        self.mean = mean

    def __call__(self, image, boxes, labels):
        if random.randint(2):                           #随机是否进行操作
            return image, boxes, labels

        height, width, depth = image.shape
        ratio = random.uniform(1, 4)   #在[1,4]随机一个实数
        left = random.uniform(0, width*ratio - width)   #设置放置原图的min_x坐标
        top = random.uniform(0, height*ratio - height)  #设置放置原图的min_y坐标

        expand_image = np.zeros(
            (int(height*ratio), int(width*ratio), depth),
            dtype=image.dtype)                          #初始化expand图片
        expand_image[:, :, :] = self.mean               #使用均值填充expand的三个通道
        expand_image[int(top):int(top + height),
                     int(left):int(left + width)] = image   #将原图放在expand图像中
        image = expand_image

        boxes = boxes.copy()                            #处理变换后的框
        boxes[:, :2] += (int(left), int(top))
        boxes[:, 2:] += (int(left), int(top))

        return image, boxes, labels

三、RandomSampleCrop(随机剪裁)

在图像上随机剪裁矩形区域,裁剪区域一定要包含bbox的中心点,将原始图bbox转换到剪裁区域的bbox

class RandomSampleCrop(object):
    def __init__(self):
        self.sample_options = (
            # using entire original input image
            None,
            # sample a patch s.t. MIN jaccard w/ obj in .1,.3,.4,.7,.9
            (0.1, None),
            (0.3, None),
            (0.7, None),
            (0.9, None),
            # randomly sample a patch
            (None, None),
        )

    def __call__(self, image, boxes=None, labels=None):
        height, width, _ = image.shape
        while True:
            # randomly choose a mode
            mode = random.choice(self.sample_options)
            if mode is None:
                return image, boxes, labels

            min_iou, max_iou = mode
            if min_iou is None:
                min_iou = float('-inf')
            if max_iou is None:
                max_iou = float('inf')

            # max trails (50)
            for _ in range(50):
                current_image = image

                w = random.uniform(0.3 * width, width)  #裁剪的w范围[0.3*width, width]
                h = random.uniform(0.3 * height, height)#裁剪的h范围[0.3*height, height]

                # aspect ratio constraint b/t .5 & 2,如果长宽比不在[0.5,2]之间就重新尝试
                if h / w < 0.5 or h / w > 2:
                    continue

                left = random.uniform(width - w)        #裁剪图像的min_x
                top = random.uniform(height - h)        #裁剪图像的max_x

                # 得到裁剪图像的[min_x,min_y,max_x,max_y]
                rect = np.array([int(left), int(top), int(left+w), int(top+h)])

                # 将裁剪图像与gt的框计算IoU
                overlap = jaccard_numpy(boxes, rect)

                # is min and max overlap constraint satisfied? if not try again
                if overlap.min() < min_iou and max_iou < overlap.max():
                    continue
                # 从原图中剪裁新图像
                current_image = current_image[rect[1]:rect[3], rect[0]:rect[2], :]
                # 计算gt的bbox框的中心
                centers = (boxes[:, :2] + boxes[:, 2:]) / 2.0
                # 检查剪裁图像的min_x, min_y要分别小于bbox的中心x, y
                m1 = (rect[0] < centers[:, 0]) * (rect[1] < centers[:, 1])
                # 检查剪裁图像的max_x, max_y要分别大于bbox的中心x, y
                m2 = (rect[2] > centers[:, 0]) * (rect[3] > centers[:, 1])
                # 上述两条要求都要为True
                mask = m1 * m2
                # 如果由不满足True的情况,就重新尝试
                if not mask.any():
                    continue
                # 初始化当前bbox
                current_boxes = boxes[mask, :].copy()
                # 获得当前各框标签
                current_labels = labels[mask]
                # 取当前各框的min_x和min_y
                current_boxes[:, :2] = np.maximum(current_boxes[:, :2], rect[:2])
                # 调整bbox中min_x, min_y位置
                current_boxes[:, :2] -= rect[:2]
                # 取当前各框的max_x和max_y
                current_boxes[:, 2:] = np.minimum(current_boxes[:, 2:], rect[2:])
                # 调整bbox中max_x, max_y位置
                current_boxes[:, 2:] -= rect[:2]

                return current_image, current_boxes, current_labels

 

  • 4
    点赞
  • 17
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值