二、深度学习数据增强方法汇总

YAYA视觉

已于 2022-07-18 11:44:20 修改

阅读量1.5k

点赞数 4

分类专栏：深度学习加载数据与文件处理 Python 文章标签：深度学习 pytorch 神经网络

于 2021-06-26 17:56:37 首次发布

本文链接：https://blog.csdn.net/Freeloop_hg/article/details/118252227

版权

深度学习同时被 3 个专栏收录

14 篇文章 0 订阅

订阅专栏

Python

9 篇文章 0 订阅

订阅专栏

加载数据与文件处理

6 篇文章 2 订阅

订阅专栏

深度学习模型训练数据增强方法汇总

一、随机裁剪
二、RGB-->BGR通道互换
三、仿射变换（缩放）
四、图片亮度、对比度调节
五、随机抠图
六、bound box 中心点随机抠图
七、随机缩放
八、椒盐噪声
九、图片模糊处理
十、图片质量压缩
十一、随机遮挡（随机擦除）
十二、图片转灰度图
十三、随机旋转
后面会增加一些非线性的数据增强的方法。。。。。。。。

一、随机裁剪

class RandomResize(object):
    def __init__(self, cfg):
        self.image_short_size = 768
        self.image_max_size = 768

    def __call__(self, data_dict):
        img_ori = data_dict["image"]
        bboxes = data_dict["bboxes"]
        keypoints = data_dict["keypoints"]


        availability = data_dict["availability"]
        mask_miss = data_dict["mask_miss"]
        bbox_idx = data_dict["crop_bbox_idx"]

        bboxes = bboxes.astype(np.float32).copy()
        keypoints = keypoints.astype(np.float32).copy()

        availability = availability.copy()

        scale_max = min(self.image_short_size / min(img_ori.shape[:2]), self.image_max_size / max(img_ori.shape[:2]))
        scale_max = min(2.5, scale_max)
        scale_min = .6 * scale_max
        scale = np.random.uniform(scale_min, scale_max)

        img_resized = cv2.resize(img_ori, (0, 0), fx=scale, fy=scale)
        mask_miss_resized = cv2.resize(mask_miss, (0, 0), fx=scale, fy=scale)
        bboxes[:, :4] *= scale
        keypoints *= scale


        data_dict["image"] = img_resized
        data_dict["bboxes"] = bboxes
        data_dict["keypoints"] = keypoints
        data_dict["availability"] = availability
        data_dict["mask_miss"] = mask_miss_resized

        return data_dict

二、RGB–>BGR通道互换

第一种方案，已经试验过，有效

class RandomBGRRGBInverse(object):
    def __init__(self, cfg):
        pass

    def __call__(self, data_dict):
        if np.random.random() > .5:
            data_dict["image"] = data_dict["image"][:, :, ::-1].copy()
        return data_dict

第二种方案，已经试验过，有效

#随机通道变换
class ShuffleRgb(object):
    def __init__(self,probability):
        """
        random_shuffle_rgb_channel
        """
        self.probability=probability

    def __call__(self, data_dict):

        org_img = data_dict["image"]
        if random.uniform(0, 1) < self.probability:
            random_num = random.randint(0, 5)
            r, g, b = cv2.split(org_img)
            if random_num == 0:
                new_img = cv2.merge([r, b, g])
            elif random_num == 1:
                new_img = cv2.merge([g, r, b])
            elif random_num == 2:
                new_img = cv2.merge([g, b, r])
            elif random_num == 3:
                new_img = cv2.merge([b, r, g])
            elif random_num == 4:
                new_img = cv2.merge([b, g, r])
            elif random_num == 5:
                new_img = cv2.cvtColor(org_img, cv2.COLOR_BGR2GRAY)
                new_img = cv2.cvtColor(new_img, cv2.COLOR_GRAY2RGB)
            else:
                new_img=org_img
        else:
            new_img=org_img

        data_dict["image"]=new_img
        return data_dict

三、仿射变换（缩放）

注意：config.TRAIN.TRANSFORM_PARAMS.max_affine_xy_ratio=0.3，如何设太大，内容会丢失，所以只支持缩小。

class RandomAffineTransform(object):
    def __init__(self, config):
        self.max_affine_xy_ratio = config.TRAIN.TRANSFORM_PARAMS.max_affine_xy_ratio

    def __call__(self, data_dict):
        img_ori = data_dict["image"]
        bbox = data_dict["bboxes"]
        keypoints = data_dict["keypoints"]

        h, w, c = img_ori.shape
        try:
            assert h >0 and w > 0
        except Exception as e:
            import logging
            logging.exception(e)
        src_points = np.array([[0, 0], [w - 1, 0], [w - 1, h - 1], [0, h - 1]])
        
        new_h=int(self.max_affine_xy_ratio * h)
        new_w=int(self.max_affine_xy_ratio * w)
        if new_h<1:
            rdh = lambda: np.random.randint(0, h)
        else:
            rdh = lambda: np.random.randint(0, new_h)

        if new_w<1:
            rdw = lambda: np.random.randint(0,  w)
        else:
            rdw = lambda: np.random.randint(0, new_w)

        dst_points = np.array([[rdw(), rdh()], [w - 1 - rdw(), rdh()], [w - 1 - rdw(), h - 1 - rdh()], [rdw(), h - 1 - rdh()]])
        H, _ = cv2.findHomography(src_points, dst_points, cv2.LMEDS)

        if keypoints.shape[1] >= 1:
            availability = data_dict["availability"]
            mask_miss = data_dict["mask_miss"]
            assert bbox.shape.__len__() == 2
            assert bbox.shape[1] == 4
            assert keypoints.shape.__len__() == 3
            assert keypoints.shape[2] == 2
            assert availability.shape.__len__() == 2

            image_transformed = cv2.warpPerspective(img_ori, H, (img_ori.shape[1], img_ori.shape[0]))
            mask_miss_transformed = cv2.warpPerspective(mask_miss, H, (img_ori.shape[1], img_ori.shape[0]))
            keypoints_dst = cv2.perspectiveTransform(keypoints, H)
            data_dict["image"] = image_transformed
            data_dict["bboxes"] = bbox
            data_dict["keypoints"] = keypoints_dst.reshape(keypoints.shape)
            data_dict["availability"] = availability
            data_dict["mask_miss"] = mask_miss_transformed

        return data_dict

四、图片亮度、对比度调节

提示：第一种，只有亮度变化，已经实验过，有效

class brightnessJust():
    def __init__(self, config):
        self.alpha=config.BrightnessJust.alpha
        self.beta=config.BrightnessJust.alpha
    def __call__(self, data_dict):
        gamma=np.random.randint(0,127)
        src2=np.uint8(np.zeros([data_dict["image"].shape[0],data_dict["image"].shape[1],data_dict["image"].shape[2]]))
        data_dict["image"]=cv2.addWeighted(data_dict["image"],self.alpha,src2,self.beta,gamma)

        return data_dict

提示：第二种，RGB空间亮度、对比度调节，已经实验过，有效

class RandomBrightnessAndContrast(object):
    def __init__(self, probability=0.1,alpha=(7,14), beta=(-20,20)):
        '''
        cnum=contrast    对比度
        bunm=brightness  图像亮度
        '''
        self.probability=probability
        self.alpha_lower = alpha[0]
        self.alpha_upper = alpha[1]
        self.beta_lower = beta[0]
        self.beta_upper = beta[1]
        self.channel_process=True #在通道上处理


    def __call__(self, data_dict):
        image=data_dict['image']
        if random.uniform(0, 1) < self.probability:
            image = np.float32(image)
            cnum = random.randint( self.alpha_lower,self.alpha_upper)/10
            bnum = random.randint(self.beta_lower,self.beta_upper)
            if self.channel_process:
                bImg, gImg, rImg = cv2.split(image)
                bImg = cnum * bImg + bnum
                gImg = cnum * gImg + bnum
                rImg = cnum * rImg + bnum
                imgMerge = cv2.merge([bImg, gImg, rImg])
                image = np.uint8(np.clip(imgMerge, 0, 255))
            else:
                imgMerge = cnum * image + bnum
                image = np.uint8(np.clip(imgMerge, 0, 255))
            data_dict['image']=image
            
        return data_dict

提示：第三种，RGB空间亮度、对比度调节，基于像素点的调节。已经实验过，有效，但是特别慢，建议改成矩阵运算

class RandomBrightnessAndContrastPixel(object):
    def __init__(self,probability=0.1):
        '''
        self.cnum=contrast    对比度
        self.bunm=brightness  图像亮度
        '''
        self.probability=probability

    def __call__(self, data_dict):
        if random.uniform(0, 1) < self.probability:
            image=data_dict['image']
            cnum=random.randint(10,20)
            bnum=random.randint(10,100)
            cimg = np.ones((image.shape[0], image.shape[1], 3), dtype=np.uint8)
            for i in range(image.shape[0]):
                for j in range(image.shape[1]):
                    cimg = 0.1 * cnum * image[i, j] + bnum
                    # cimg[i, j] = [int(ele) if ele < 255 else 255 for ele in lst]
            cimg=np.clip(cimg,0,255)

            data_dict['image']=cimg

        return data_dict

五、随机抠图

class RandomCrop(object):
    def __init__(self, cfg):
        self.center_perterb_max = cfg.TRAIN.TRANSFORM_PARAMS.center_perterb_max  # type: float
        self.crop_size_x = cfg.TRAIN.TRANSFORM_PARAMS.crop_size_x
        self.crop_size_y = cfg.TRAIN.TRANSFORM_PARAMS.crop_size_y
        self.pad_value = cfg.TRAIN.TRANSFORM_PARAMS.PAD_VALUE

    def __call__(self, data_dict):
        img_ori = data_dict["image"]
        bboxes = data_dict["bboxes"]
        keypoints = data_dict["keypoints"]
        availability = data_dict["availability"]
        mask_miss = data_dict["mask_miss"]
        bbox_idx = data_dict["crop_bbox_idx"]

        bboxes = bboxes.copy()
        keypoints = keypoints.copy()
        availability = availability.copy()

        center_x = np.random.randint(0, img_ori.shape[1])
        center_y = np.random.randint(0, img_ori.shape[0])
        center_x = int(np.round(center_x))
        center_y = int(np.round(center_y))

        start_x = max(center_x - self.crop_size_x // 2, 0)
        start_y = max(center_y - self.crop_size_y // 2, 0)

        end_x = min(center_x + self.crop_size_x // 2, img_ori.shape[1])
        end_y = min(center_y + self.crop_size_y // 2, img_ori.shape[0])

        offset_x = center_x - self.crop_size_x // 2
        offset_y = center_y - self.crop_size_y // 2

        image_cropped = img_ori[start_y:end_y, start_x:end_x]
        image_cropped_padded = np.ones(shape=(self.crop_size_y, self.crop_size_x, img_ori.shape[2]), dtype=np.float32) * self.pad_value
        dst_start_x = start_x - offset_x
        dst_start_y = start_y - offset_y
        dst_end_x = dst_start_x + image_cropped.shape[1]
        dst_end_y = dst_start_y + image_cropped.shape[0]
        image_cropped_padded[dst_start_y:dst_end_y, dst_start_x:dst_end_x] = image_cropped

        mask_miss_cropped = mask_miss[start_y:end_y, start_x:end_x]
        mask_miss_cropped_padded = np.zeros(shape=(self.crop_size_y, self.crop_size_x), dtype=np.float32)
        mask_miss_cropped_padded[dst_start_y:dst_end_y, dst_start_x:dst_end_x] = mask_miss_cropped

        bboxes[:, (0, 2)] -= offset_x
        bboxes[:, (1, 3)] -= offset_y
        keypoints[:, :, 0] -= offset_x
        keypoints[:, :, 1] -= offset_y
        for m in range(keypoints.shape[0]):
            for n in range(keypoints.shape[1]):
                x, y = keypoints[m, n]
                if not (0 <= x < image_cropped_padded.shape[1] and 0 <= y < image_cropped_padded.shape[0]):
                    availability[m, n] = 0

        data_dict["image"] = image_cropped_padded
        data_dict["bboxes"] = bboxes
        data_dict["keypoints"] = keypoints
        data_dict["availability"] = availability
        data_dict["crop_bbox_idx"] = bbox_idx  # to generate mask.
        data_dict["mask_miss"] = mask_miss_cropped_padded
        return data_dict

六、bound box 中心点随机抠图

class RandomCenterCrop(object):
    def __init__(self, cfg):
        self.center_perterb_max = cfg.TRAIN.TRANSFORM_PARAMS.center_perterb_max  # type: float
        self.crop_size_x = cfg.TRAIN.TRANSFORM_PARAMS.crop_size_x
        self.crop_size_y = cfg.TRAIN.TRANSFORM_PARAMS.crop_size_y
        self.pad_value = cfg.TRAIN.TRANSFORM_PARAMS.PAD_VALUE

    def __call__(self, data_dict):
        img_ori = data_dict["image"]
        bboxes = data_dict["bboxes"]
        keypoints = data_dict["keypoints"]
        availability = data_dict["availability"]
        mask_miss = data_dict["mask_miss"]
        bbox_idx = data_dict["crop_bbox_idx"]

        bboxes = bboxes.copy()
        keypoints = keypoints.copy()
        availability = availability.copy()

        bbox = bboxes[bbox_idx]
        center_x = .5 * (bbox[0] + bbox[2])
        center_y = .5 * (bbox[1] + bbox[3])
        center_x += (np.random.random() * 2 - 1) * self.center_perterb_max
        center_y += (np.random.random() * 2 - 1) * self.center_perterb_max

        center_x = int(np.round(center_x))
        center_y = int(np.round(center_y))

        start_x = max(center_x - self.crop_size_x // 2, 0)
        start_y = max(center_y - self.crop_size_y // 2, 0)

        end_x = min(center_x + self.crop_size_x // 2, img_ori.shape[1])
        end_y = min(center_y + self.crop_size_y // 2, img_ori.shape[0])

        offset_x = center_x - self.crop_size_x // 2
        offset_y = center_y - self.crop_size_y // 2

        image_cropped = img_ori[start_y:end_y, start_x:end_x]
        image_cropped_padded = np.ones(shape=(self.crop_size_y, self.crop_size_x, img_ori.shape[2]), dtype=np.float32) * self.pad_value
        dst_start_x = start_x - offset_x
        dst_start_y = start_y - offset_y
        dst_end_x = dst_start_x + image_cropped.shape[1]
        dst_end_y = dst_start_y + image_cropped.shape[0]
        image_cropped_padded[dst_start_y:dst_end_y, dst_start_x:dst_end_x] = image_cropped

        mask_miss_cropped = mask_miss[start_y:end_y, start_x:end_x]
        mask_miss_cropped_padded = np.zeros(shape=(self.crop_size_y, self.crop_size_x), dtype=np.float32)
        mask_miss_cropped_padded[dst_start_y:dst_end_y, dst_start_x:dst_end_x] = mask_miss_cropped

        bboxes[:, (0, 2)] -= offset_x
        bboxes[:, (1, 3)] -= offset_y
        keypoints[:, :, 0] -= offset_x
        keypoints[:, :, 1] -= offset_y
        for m in range(keypoints.shape[0]):
            for n in range(keypoints.shape[1]):
                x, y = keypoints[m, n]
                if not (0 <= x < image_cropped_padded.shape[1] and 0 <= y < image_cropped_padded.shape[0]):
                    availability[m, n] = 0

        data_dict["image"] = image_cropped_padded
        data_dict["bboxes"] = bboxes
        data_dict["keypoints"] = keypoints
        data_dict["availability"] = availability
        data_dict["crop_bbox_idx"] = bbox_idx  # to generate mask.
        data_dict["mask_miss"] = mask_miss_cropped_padded
        return data_dict

七、随机缩放

class RandomScale(object):
    def __init__(self, cfg):
        self.scale_min = cfg.TRAIN.TRANSFORM_PARAMS.scale_min
        self.scale_max = cfg.TRAIN.TRANSFORM_PARAMS.scale_max
        self.crop_size_y = cfg.TRAIN.TRANSFORM_PARAMS.resize_base_size_y
        self.target_dist = cfg.TRAIN.TRANSFORM_PARAMS.target_dist

    def __call__(self, data_dict):
        img_ori = data_dict["image"]
        bboxes = data_dict["bboxes"]
        keypoints = data_dict["keypoints"]
        availability = data_dict["availability"]
        mask_miss = data_dict["mask_miss"]
        bbox_idx = data_dict["crop_bbox_idx"]

        bboxes = bboxes.astype(np.float32).copy()
        keypoints = keypoints.astype(np.float32).copy()
        availability = availability.copy()

        if np.random.random() > .5:
            scale_multiplier = np.random.random() * (self.scale_max - self.scale_min) + self.scale_min
            scale_self = (bboxes[bbox_idx][3] - bboxes[bbox_idx][1]) / self.crop_size_y
            scale_abs = self.target_dist / scale_self
            scale = scale_abs * scale_multiplier
        else:
            scale = 1

        img_resized = cv2.resize(img_ori, (0, 0), fx=scale, fy=scale)
        mask_miss_resized = cv2.resize(mask_miss, (0, 0), fx=scale, fy=scale)
        bboxes[:, :4] *= scale
        keypoints *= scale

        data_dict["image"] = img_resized
        data_dict["bboxes"] = bboxes
        data_dict["keypoints"] = keypoints
        data_dict["availability"] = availability
        data_dict["mask_miss"] = mask_miss_resized
        return data_dict

八、椒盐噪声

提示：椒盐噪声，噪点是在（0-255）之间随机生成。能有效使用

class pepper_salt_noise(object):
    def __init__(self,probability):
        self.probability=probability

    def __call__(self, data_dict):
        image = data_dict['image']
        if random.uniform(0, 1) < self.probability:
            # image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            percetage = random.uniform(0, 0.017)
            Noiseimg = image
            Noisenum = int(percetage * Noiseimg.shape[0] * Noiseimg.shape[1])
            if random.uniform(0, 1) < 0.5:
                for i in range(Noisenum):
                    randx = random.randint(0, image.shape[0] - 1)
                    randy = random.randint(0, image.shape[1] - 1)
                    if random.uniform(0, 1) < 0.5:
                        Noiseimg[randx, randy] = [0, 0, 0]
                    else:
                        Noiseimg[randx, randy] = [255, 255, 255]
        else:
            Noiseimg=image

        data_dict['image']=Noiseimg
        return data_dict

九、图片模糊处理

提示：能有效使用

class PictureBlur(object):
    def __init__(self,probability):
        '''
        滤波处理主要是让图像变得模糊，常用：高斯模糊、中值模糊、均值模糊。
        卷积核一般为奇数:(3,3),(5,5),(7,7)
        '''
        self.probability=probability

    def __call__(self, data_dict):
        image=data_dict['image']
        if random.uniform(0,1)<self.probability:
            rnd=np.random.randint(0,4)
            if rnd==0:
                image=cv2.blur(image,(5,5))
            elif rnd==1:
                image=cv2.medianBlur(image,5)
            elif rnd==3:
                image=cv2.GaussianBlur(image,(5,5),0)
            else:
                image=image
        else:
            image=image

        data_dict['image']=image
        return data_dict

十、图片质量压缩

提示：可以采用多种插值方式，本次采用双三次插值，对图片质量的压缩。能有效使用

class PictureCompression(object):
    def __init__(self, probability):
        self.probability = probability

    def __call__(self, data_dict):

        compress_rate = random.randint(1,10)/10
        if random.uniform(0,1)<self.probability:
            heigh, width = data_dict['image'].shape[:2]
            # 双三次插值
            image_resize = cv2.resize(data_dict['image'],
                                      (int(heigh * compress_rate), int(width * compress_rate)),
                                      interpolation=cv2.INTER_AREA)
            # 双三次插值
            heigh_resize, width_resize = image_resize.shape[:2]
            image = cv2.resize(image_resize,
                               (int(heigh_resize / compress_rate), int(width_resize / compress_rate)),
                               interpolation=cv2.INTER_LINEAR)
            data_dict['image']=image

        return data_dict

十一、随机遮挡（随机擦除）

提示：随机遮挡，能有效使用

class RandomErasing(object):
    """ Randomly selects a rectangle region in an image and erases its pixels.
        'Random Erasing Data Augmentation' by Zhong et al.
        See https://arxiv.org/pdf/1708.04896.pdf
    Args:
         probability: The probability that the Random Erasing operation will be performed.
         sl: Minimum proportion of erased area against input image.
         sh: Maximum proportion of erased area against input image.
         r1: Minimum aspect ratio of erased area.
         mean: Erasing value.
    """

    def __init__(self, probability=0.1, sl=0.002, sh=0.05, r1=0.3, mean=(0.485, 0.456, 0.406)):
        self.probability = probability
        self.mean = mean
        self.sl = sl
        self.sh = sh
        self.r1 = r1
        self.area_mean_roi=False

    def __call__(self, data_dict):
        '''
        data_dict:
        img:data_dict["image"]-->h,w,c
        keypoints=data_dict["keypoints"]
        '''
        img=data_dict['image']
        img = copy.deepcopy(img)
        if random.uniform(0, 1) < self.probability:
            # 随机擦除1-5块
            mask_numbers=random.randint(1,5)
            for attempt in range(mask_numbers):
                area = img.shape[0] * img.shape[1]

                # 计算采样面积和采样长宽比
                target_area = random.uniform(self.sl, self.sh) * area
                aspect_ratio = random.uniform(self.r1, 1 / self.r1)

                h = int(round(math.sqrt(target_area * aspect_ratio)))
                w = int(round(math.sqrt(target_area / aspect_ratio)))

                if w < img.shape[1] and h < img.shape[0]:
                    x1 = random.randint(0, img.shape[0] - h)
                    y1 = random.randint(0, img.shape[1] - w)

                    if self.area_mean_roi:
                        # 计算该区域的平均像素
                        image_roi = img[x1:x1 + h, y1:y1 + w, :]
                        image_mean = np.mean(image_roi, axis=(0, 1)) #也可以设置随机像素擦除
                    else:
                        # 随机像素擦除
                        b_roi=random.uniform(0, 255)
                        g_roi=random.uniform(0, 255)
                        r_roi=random.uniform(0, 255)
                        image_mean = [b_roi,g_roi,r_roi]

                    # 设置三个通道为0
                    # image_mean[0] = 0
                    # image_mean[1] = 0
                    # image_mean[2] = 0
                    if img.shape[2] == 3:
                        img[x1:x1 + h, y1:y1 + w, 0] = image_mean[0]
                        img[x1:x1 + h, y1:y1 + w, 1] = image_mean[1]
                        img[x1:x1 + h, y1:y1 + w, 2] = image_mean[2]
                    else:
                        img[x1:x1 + h, y1:y1 + w] = image_mean[0]

            data_dict['image'] = img

        return data_dict

十二、图片转灰度图

提示：这部分数据增强的占比可以少点，能有效使用

class RGB2GRAY(object):
    def __init__(self, probability=0.1):
        self.probability = probability

    def __call__(self, data_dict):
        image=data_dict['image']
        if random.uniform(0, 1) < self.probability:
            image_gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
            # 合并为三通道，以便输入网络
            # image_gray = cv2.merge([image_gray, image_gray, image_gray])
            image_gray = np.stack((image_gray, image_gray, image_gray), axis=2)

            data_dict['image']=image_gray

        return data_dict

十三、随机旋转

提示：1.config.TRAIN.TRANSFORM_PARAMS.max_rotation_degree=40;2.data_dict['image']采用cv读图；3.data_dict["keypoints"]的形状为[1,N,2],N代表多少个关键点。目前在关键点检测能正常使用。能有效使用

def rotate_bound(image, angle):
    # grab the dimensions of the image and then determine the
    # center
    h, w = image.shape[:2]

    (cX, cY) = (w // 2, h // 2)

    M = cv2.getRotationMatrix2D((cX, cY), angle, 1.0)
    cos = np.abs(M[0, 0])
    sin = np.abs(M[0, 1])

    # compute the new bounding dimensions of the image
    nW = int((h * sin) + (w * cos))
    nH = int((h * cos) + (w * sin))

    # adjust the rotation matrix to take into account translation
    M[0, 2] += (nW / 2) - cX
    M[1, 2] += (nH / 2) - cY

    rotated = cv2.warpAffine(image, M, (nW, nH))

    return rotated
 
 #start random rotate
class RandomRotate(object):
    def __init__(self, config):
        self.min_angle = -1 * config.TRAIN.TRANSFORM_PARAMS.max_rotation_degree
        self.max_angle = config.TRAIN.TRANSFORM_PARAMS.max_rotation_degree

    def __call__(self, data_dict):
        img_ori = data_dict["image"]
        bbox = data_dict["bboxes"]
        keypoints = data_dict["keypoints"]
        availability = data_dict["availability"]
        mask_miss = data_dict["mask_miss"]
        assert bbox.shape.__len__() == 2
        assert bbox.shape[1] == 4
        assert keypoints.shape.__len__() == 3
        assert keypoints.shape[2] == 2
        assert availability.shape.__len__() == 2

        # rotate bbox and image
        kps = np.empty(shape=(len(bbox), 4, 2), dtype=np.float32)
        kps[:, 0, :] = bbox[:, (0, 1)]
        kps[:, 1, :] = bbox[:, (2, 1)]
        kps[:, 2, :] = bbox[:, (2, 3)]
        kps[:, 3, :] = bbox[:, (0, 3)]
        if np.random.random() > .5:
            angle = np.random.uniform(self.min_angle, self.max_angle)
        else:
            angle = [0, 90, 180][np.random.randint(0, 3)]
        image_rotated, mask_miss_rotated, kps_rotated, M = rotate_bound(img_ori, mask_miss, kps.reshape((-1, 2)), angle)
        kps_rotated = kps_rotated.reshape(kps.shape)
        bbox_rotated = np.zeros_like(bbox)
        bbox_rotated[:, 0] = kps_rotated[:, :, 0].min(axis=1)
        bbox_rotated[:, 1] = kps_rotated[:, :, 1].min(axis=1)
        bbox_rotated[:, 2] = kps_rotated[:, :, 0].max(axis=1)
        bbox_rotated[:, 3] = kps_rotated[:, :, 1].max(axis=1)

        # rotate keypoints
        keypoints_reshapped = keypoints.reshape(-1, 2)
        keypoints_homo = np.ones(shape=(keypoints_reshapped.shape[0], 3))
        keypoints_homo[:, :2] = keypoints_reshapped
        keypoints_rotated = keypoints_homo.dot(M.T)

        data_dict["image"] = image_rotated
        data_dict["bboxes"] = bbox_rotated
        data_dict["keypoints"] = keypoints_rotated.reshape(keypoints.shape)
        data_dict["availability"] = availability
        data_dict["mask_miss"] = mask_miss_rotated

        return data_dict

下面展示一些 内联代码片，后续不断的实验调通，会及时更新…

from __future__ import division
import cv2
from PIL import Image,ImageFilter,ImageEnhance
import numpy as np
from numpy import random
import math
import glob
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
import os

__all__ = ['Compose','RandomHflip', 'RandomUpperCrop', 'Resize', 'UpperCrop', 'RandomBottomCrop',"RandomErasing",
           'BottomCrop', 'Normalize','Normalize_test', 'RandomSwapChannels', 'FixRandomRotate','RandomRotate', 'RandomHShift',"CenterCrop",'RandomBrightnessAndContrastPixel','RandomBrightnessAndContrast',"pepper_salt_noise",'PictureBlur', 'PictureCompression', 'RGB2GRAY',
           'ExpandBorder', 'RandomResizedCrop','RandomTwelveCrop','RandomDownCrop', 'DownCrop', 'ResizedCrop','Lighter']


def ruihua(img):
    src = cv2.cvtColor(np.asarray(img), cv2.COLOR_RGB2BGR)
    kernel2 = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]], np.float32)
    dst = cv2.filter2D(src, -1, kernel2)
    image = Image.fromarray(cv2.cvtColor(dst, cv2.COLOR_BGR2RGB))
    return image


def light_up(img):
    # img = cv2.cvtColor(np.asarray(img), cv2.COLOR_RGB2BGR)

    dst = np.uint8(np.clip((1.2 * img + 10), 0, 255))
    image = Image.fromarray(cv2.cvtColor(dst, cv2.COLOR_BGR2RGB))
    return image


class light_down(object):
    def __init__(self):
        pass

    def __call__(self, img):
        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

        dst = np.uint8(np.clip((0.6 * img + 10), 0, 255))
        # image = Image.fromarray(cv2.cvtColor(dst, cv2.COLOR_BGR2RGB))
        return dst

# def Laplas(img):
#     src = cv2.cvtColor(np.asarray(img), cv2.COLOR_RGB2BGR)
#
#     dst1 = cv2.pyrDown(src)
#
#     dst2 = cv2.pyrDown(dst1)
#     dst3 = cv2.pyrDown(dst2)
#
#     dst4 = cv2.pyrUp(dst3)
#     dst5 = cv2.pyrUp(dst4)
#     dst6 = cv2.pyrUp(dst5)
#
#     h1, w1, c1 = dst6.shape
#     src = cv2.resize(src, (w1, h1))
#     dst7 = src - dst6
#     image = Image.fromarray(cv2.cvtColor(dst7, cv2.COLOR_BGR2RGB))
#     return image


def rotate_nobound(image, angle, center=None, scale=1.):
    (h, w) = image.shape[:2]


    # if the center is None, initialize it as the center of
    # the image
    if center is None:
        center = (w // 2, h // 2)

    # perform the rotation
    M = cv2.getRotationMatrix2D(center, angle, scale)
    rotated = cv2.warpAffine(image, M, (w, h))

    return rotated

def scale_down(src_size, size):
    w, h = size
    sw, sh = src_size
    if sh < h:
        w, h = float(w * sh) / h, sh
    if sw < w:
        w, h = sw, float(h * sw) / w
    return int(w), int(h)


def fixed_crop(src, x0, y0, w, h, size=None):
    out = src[y0:y0 + h, x0:x0 + w]
    if size is not None and (w, h) != size:
        out = cv2.resize(out, (size[0], size[1]), interpolation=cv2.INTER_CUBIC)
    return out


def center_crop(src, size):
    h, w = src.shape[0:2]
    new_w, new_h = scale_down((w, h), size)

    x0 = int((w - new_w) / 2)
    y0 = int((h - new_h) / 2)

    out = fixed_crop(src, x0, y0, new_w, new_h, size)
    return out


def bottom_crop(src, size):
    h, w = src.shape[0:2]
    new_w, new_h = scale_down((w, h), size)

    x0 = int((w - new_w) / 2)
    y0 = int((h - new_h) * 0.75)

    out = fixed_crop(src, x0, y0, new_w, new_h, size)
    return out

def rotate_bound(image, angle):
    # grab the dimensions of the image and then determine the
    # center
    h, w = image.shape[:2]

    (cX, cY) = (w // 2, h // 2)

    M = cv2.getRotationMatrix2D((cX, cY), angle, 1.0)
    cos = np.abs(M[0, 0])
    sin = np.abs(M[0, 1])

    # compute the new bounding dimensions of the image
    nW = int((h * sin) + (w * cos))
    nH = int((h * cos) + (w * sin))

    # adjust the rotation matrix to take into account translation
    M[0, 2] += (nW / 2) - cX
    M[1, 2] += (nH / 2) - cY

    rotated = cv2.warpAffine(image, M, (nW, nH))

    return rotated


class Compose(object):
    def __init__(self, transforms):
        self.transforms = transforms
    def __call__(self, img):
        for t in self.transforms:
            img = t(img)
        return img


class pepper_salt_noise(object):
    def __init__(self):
        pass

    def __call__(self, image):
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        percetage = random.uniform(0, 0.017)
        Noiseimg = image
        Noisenum = int(percetage * Noiseimg.shape[0] * Noiseimg.shape[1])
        for i in range(Noisenum):
            randx = random.randint(0, image.shape[0] - 1)
            randy = random.randint(0, image.shape[1] - 1)
            if random.uniform(0, 1) < 0.5:
                Noiseimg[randx, randy] = [0, 0, 0]
            else:
                Noiseimg[randx, randy] = [255, 255, 255]
        return Noiseimg


class perspective_transformation(object):
    def __init__(self, resize=50):
        self.resize = resize

    def __call__(self, image):
        if random.random() > 0.5:
            image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
            h, w, _ = image.shape

            src = np.array([[0, 0], [w-1, 0], [w-1, h-1], [0, h-1]], dtype=np.float32)
            l_up_x_scale = round(random.uniform(0.0, 0.2), 3)
            l_up_y_scale = round(random.uniform(0.0, 0.2), 3)
            r_up_x_scale = round(random.uniform(0.8, 1), 3)
            r_up_y_scale = round(random.uniform(0.0, 0.2), 3)

            r_down_x_scale = round(random.uniform(0.8, 1), 3)
            r_down_y_scale = round(random.uniform(0.8, 1), 3)
            l_down_x_scale = round(random.uniform(0.0, 0.2), 3)
            l_down_y_scale = round(random.uniform(0.8, 1), 3)
            dst = np.array([[w*l_up_x_scale, h*l_up_y_scale], [w*r_up_x_scale, h*r_up_y_scale],
                            [w*r_down_x_scale, h*r_down_y_scale], [w*l_down_x_scale, h*l_down_y_scale]], dtype=np.float32)
            transform_matrix = cv2.getPerspectiveTransform(src, dst)
            warp_img = cv2.warpPerspective(image, transform_matrix, (w,h))
            # image = cv2.resize(warp_img, (self.resize, self.resize))
            image = cv2.resize(warp_img, (48, 26))
        return image


class RandomRotate(object):
    def __init__(self, angles, bound=False):
        self.angles = angles
        self.bound = bound

    def __call__(self,img):
        if random.random() > 0.5:
            angle = np.random.uniform(self.angles[0], self.angles[1])
            if self.bound:
                img = rotate_bound(img, angle)
            else:
                img = rotate_nobound(img, angle)
        return img


class dog_way(object):
    def __init__(self):
        self.clahe_block = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(5, 5))
        pass

    def __call__(self,img):
        if random.random() > 0.5:
            img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
            img = cv2.circle(img, (5, 5), 2, 50, 1)
            self.clahe_block.apply(img, img)
            img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
            # print("------  dog_way   ------- ", img.shape)
            # print("???")
        return img


class RandomBrightness(object):
    def __init__(self, delta=10):
        assert delta >= 0
        assert delta <= 255
        self.delta = delta

    def __call__(self, image):
        if random.randint(2):
            delta = random.uniform(-self.delta, self.delta)
            image = (image + delta).clip(0.0, 255.0)
            # print('RandomBrightness,delta ',delta)
        return image


class RandomContrast(object):
    def __init__(self, lower=0.9, upper=1.05):
        self.lower = lower
        self.upper = upper
        assert self.upper >= self.lower, "contrast upper must be >= lower."
        assert self.lower >= 0, "contrast lower must be non-negative."

    # expects float image
    def __call__(self, image):
        if random.randint(2):
            alpha = random.uniform(self.lower, self.upper)
            # print('contrast:', alpha)
            image = (image * alpha).clip(0.0,255.0)
        return image


class RandomSaturation(object):
    def __init__(self, lower=0.8, upper=1.2):
        self.lower = lower
        self.upper = upper
        assert self.upper >= self.lower, "contrast upper must be >= lower."
        assert self.lower >= 0, "contrast lower must be non-negative."

    def __call__(self, image):
        if random.randint(2):
            alpha = random.uniform(self.lower, self.upper)
            image[:, :, 1] *= alpha
            # print('RandomSaturation,alpha',alpha)
        return image
class FixRandomRotate(object):
    def __init__(self, angles=[0,90,180,270], bound=False):
        self.angles = angles
        self.bound = bound

    def __call__(self,img):
        do_rotate = random.randint(0, 4)
        angle=self.angles[do_rotate]
        if self.bound:
            img = rotate_bound(img, angle)
        else:
            img = rotate_nobound(img, angle)
        return img

class RandomHue(object):
    def __init__(self, delta=18.0):
        assert delta >= 0.0 and delta <= 360.0
        self.delta = delta

    def __call__(self, image):
        if random.randint(2):
            alpha = random.uniform(-self.delta, self.delta)
            image[:, :, 0] += alpha
            image[:, :, 0][image[:, :, 0] > 360.0] -= 360.0
            image[:, :, 0][image[:, :, 0] < 0.0] += 360.0
            # print('RandomHue,alpha:', alpha)
        return image


class ConvertColor(object):
    def __init__(self, current='BGR', transform='HSV'):
        self.transform = transform
        self.current = current

    def __call__(self, image):
        if self.current == 'BGR' and self.transform == 'HSV':
            image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
        elif self.current == 'HSV' and self.transform == 'BGR':
            image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
        else:
            raise NotImplementedError
        return image

class RandomSwapChannels(object):
    def __call__(self, img):
        if np.random.randint(2):
            order = np.random.permutation(3)
            return img[:,:,order]
        return img

class RandomCrop(object):
    def __init__(self, size):
        self.size = size
    def __call__(self, image):
        h, w, _ = image.shape
        new_w, new_h = scale_down((w, h), self.size)

        if w == new_w:
            x0 = 0
        else:
            x0 = random.randint(0, w - new_w)

        if h == new_h:
            y0 = 0
        else:
            y0 = random.randint(0, h - new_h)

        out = fixed_crop(image, x0, y0, new_w, new_h, self.size)
        return out



class RandomResizedCrop(object):
    def __init__(self, size,scale=(0.49, 1.0), ratio=(1., 1.)):
        self.size = size
        self.scale = scale
        self.ratio = ratio

    def __call__(self,img):
        if random.random() < 0.2:
            return cv2.resize(img,self.size)
        h, w, _ = img.shape
        area = h * w
        d=1
        for attempt in range(10):
            target_area = random.uniform(self.scale[0], self.scale[1]) * area
            aspect_ratio = random.uniform(self.ratio[0], self.ratio[1])


            new_w = int(round(math.sqrt(target_area * aspect_ratio)))
            new_h = int(round(math.sqrt(target_area / aspect_ratio)))

            if random.random() < 0.5:
                new_h, new_w = new_w, new_h

            if new_w < w and new_h < h:
                x0 = random.randint(0, w - new_w)
                y0 = (random.randint(0, h - new_h))//d
                out = fixed_crop(img, x0, y0, new_w, new_h, self.size)

                return out

        # Fallback
        return center_crop(img, self.size)


class DownCrop():
    def __init__(self, size,  select, scale=(0.36,0.81)):
        self.size = size
        self.scale = scale
        self.select = select

    def __call__(self,img, attr_idx):
        if attr_idx not in self.select:
            return img, attr_idx
        if attr_idx == 0:
            self.scale=(0.64,1.0)
        h, w, _ = img.shape
        area = h * w

        s = (self.scale[0]+self.scale[1])/2.0

        target_area = s * area

        new_w = int(round(math.sqrt(target_area)))
        new_h = int(round(math.sqrt(target_area)))

        if new_w < w and new_h < h:
            dw = w-new_w
            x0 = int(0.5*dw)
            y0 = h-new_h
            out = fixed_crop(img, x0, y0, new_w, new_h, self.size)
            return out, attr_idx

        # Fallback
        return center_crop(img, self.size), attr_idx


class ResizedCrop(object):
    def __init__(self, size, select,scale=(0.64, 1.0), ratio=(3. / 4., 4. / 3.)):
        self.size = size
        self.scale = scale
        self.ratio = ratio
        self.select = select

    def __call__(self,img, attr_idx):
        if attr_idx not in self.select:
            return img, attr_idx
        h, w, _ = img.shape
        area = h * w
        d=1
        if attr_idx == 2:
            self.scale=(0.36,0.81)
            d=2
        if attr_idx == 0:
            self.scale=(0.81,1.0)

        target_area = (self.scale[0]+self.scale[1])/2.0 * area
        # aspect_ratio = random.uniform(self.ratio[0], self.ratio[1])


        new_w = int(round(math.sqrt(target_area)))
        new_h = int(round(math.sqrt(target_area)))

        # if random.random() < 0.5:
        #     new_h, new_w = new_w, new_h

        if new_w < w and new_h < h:
            x0 =  (w - new_w)//2
            y0 = (h - new_h)//d//2
            out = fixed_crop(img, x0, y0, new_w, new_h, self.size)
            # cv2.imshow('{}_img'.format(idx2attr_map[attr_idx]), img)
            # cv2.imshow('{}_crop'.format(idx2attr_map[attr_idx]), out)
            #
            # cv2.waitKey(0)
            return out, attr_idx

        # Fallback
        return center_crop(img, self.size), attr_idx
class RandomTwelveCrop(object):
    def __init__(self, size):
        self.size = size
    def __call__(self, image):

        sh, sw, _ = image.shape
        new_w,new_h=self.size

        image_list=[]
        image_list.append(cv2.resize(image, self.size))
        x0 = 0
        y0 = 0
        image_list.append(fixed_crop(image, x0, y0, new_w, new_h, self.size))
        x0 = int(sw - new_w)
        y0 = 0
        image_list.append(fixed_crop(image, x0, y0, new_w, new_h, self.size))
        x0 = 0
        y0 = int(sh - new_h)
        image_list.append(fixed_crop(image, x0, y0, new_w, new_h, self.size))
        x0 = int(sw-new_w)
        y0 = int(sh - new_h)
        image_list.append(fixed_crop(image, x0, y0, new_w, new_h, self.size))
        x0 = int((sw - new_w) / 2)
        y0 = int((sh - new_h)/ 2)
        image_list.append(fixed_crop(image, x0, y0, new_w, new_h, self.size))

        flipimage=cv2.flip(image, 0)

        image_list.append(cv2.resize(flipimage, self.size))
        x0 = 0
        y0 = 0
        image_list.append(fixed_crop(image, x0, y0, new_w, new_h, self.size))
        x0 = int(sw - new_w)
        y0 = 0
        image_list.append(fixed_crop(image, x0, y0, new_w, new_h, self.size))
        x0 = 0
        y0 = int(sh - new_h)
        image_list.append(fixed_crop(image, x0, y0, new_w, new_h, self.size))
        x0 = int(sw - new_w)
        y0 = int(sh - new_h)
        image_list.append(fixed_crop(image, x0, y0, new_w, new_h, self.size))
        x0 = int((sw - new_w) / 2)
        y0 = int((sh - new_h) / 2)
        image_list.append(fixed_crop(image, x0, y0, new_w, new_h, self.size))

        return image_list
class RandomHflip(object):
    def __call__(self, image):
        if random.randint(2):
            return cv2.flip(image, 1)
        else:
            return image


class Hflip(object):
    def __init__(self,doHflip):
        self.doHflip = doHflip

    def __call__(self, image):
        if self.doHflip:
            return cv2.flip(image, 1)
        else:
            return image


class CenterCrop(object):
    def __init__(self, size):
        self.size = size

    def __call__(self, image):
        return center_crop(image, self.size)

class UpperCrop():
    def __init__(self, size, scale=(0.09, 0.64)):
        self.size = size
        self.scale = scale

    def __call__(self,img):
        h, w, _ = img.shape
        area = h * w

        s = (self.scale[0]+self.scale[1])/2.0

        target_area = s * area

        new_w = int(round(math.sqrt(target_area)))
        new_h = int(round(math.sqrt(target_area)))

        if new_w < w and new_h < h:
            dw = w-new_w
            x0 = int(0.5*dw)
            y0 = 0
            out = fixed_crop(img, x0, y0, new_w, new_h, self.size)
            return out

        # Fallback
        return center_crop(img, self.size)



class RandomUpperCrop(object):
    def __init__(self, size, select, scale=(0.09, 0.64), ratio=(3. / 4., 4. / 3.)):
        self.size = size
        self.scale = scale
        self.ratio = ratio
        self.select = select

    def __call__(self,img, attr_idx):
        if random.random() < 0.2:
            return img, attr_idx
        if attr_idx not in self.select:
            return img, attr_idx

        h, w, _ = img.shape
        area = h * w
        for attempt in range(10):
            s = random.uniform(self.scale[0], self.scale[1])
            d = 0.1 + (0.3 - 0.1) / (self.scale[1] - self.scale[0]) * (s - self.scale[0])
            target_area = s * area
            aspect_ratio = random.uniform(self.ratio[0], self.ratio[1])
            new_w = int(round(math.sqrt(target_area * aspect_ratio)))
            new_h = int(round(math.sqrt(target_area / aspect_ratio)))


            # new_w = int(round(math.sqrt(target_area)))
            # new_h = int(round(math.sqrt(target_area)))

            if new_w < w and new_h < h:
                dw = w-new_w
                x0 = random.randint(int((0.5-d)*dw), int((0.5+d)*dw)+1)
                y0 = (random.randint(0, h - new_h))//10
                out = fixed_crop(img, x0, y0, new_w, new_h, self.size)
                return out, attr_idx

        # Fallback
        return center_crop(img, self.size), attr_idx
class RandomDownCrop(object):
    def __init__(self, size, select, scale=(0.36, 0.81), ratio=(3. / 4., 4. / 3.)):
        self.size = size
        self.scale = scale
        self.ratio = ratio
        self.select = select

    def __call__(self,img, attr_idx):
        if random.random() < 0.2:
            return img, attr_idx
        if attr_idx not in self.select:
            return img, attr_idx
        if attr_idx == 0:
            self.scale=(0.64,1.0)

        h, w, _ = img.shape
        area = h * w
        for attempt in range(10):
            s = random.uniform(self.scale[0], self.scale[1])
            d = 0.1 + (0.3 - 0.1) / (self.scale[1] - self.scale[0]) * (s - self.scale[0])
            target_area = s * area
            aspect_ratio = random.uniform(self.ratio[0], self.ratio[1])
            new_w = int(round(math.sqrt(target_area * aspect_ratio)))
            new_h = int(round(math.sqrt(target_area / aspect_ratio)))
            #
            # new_w = int(round(math.sqrt(target_area)))
            # new_h = int(round(math.sqrt(target_area)))

            if new_w < w and new_h < h:
                dw = w-new_w
                x0 = random.randint(int((0.5-d)*dw), int((0.5+d)*dw)+1)
                y0 = (random.randint((h - new_h)*9//10, h - new_h))
                out = fixed_crop(img, x0, y0, new_w, new_h, self.size)

                # cv2.imshow('{}_img'.format(idx2attr_map[attr_idx]), img)
                # cv2.imshow('{}_crop'.format(idx2attr_map[attr_idx]), out)
                #
                # cv2.waitKey(0)

                return out, attr_idx

        # Fallback
        return center_crop(img, self.size), attr_idx

class RandomHShift(object):
    def __init__(self, select, scale=(0.0, 0.2)):
        self.scale = scale
        self.select = select

    def __call__(self,img, attr_idx):
        if attr_idx not in self.select:
            return img, attr_idx
        do_shift_crop = random.randint(0, 2)
        if do_shift_crop:
            h, w, _ = img.shape
            min_shift = int(w*self.scale[0])
            max_shift = int(w*self.scale[1])
            shift_idx = random.randint(min_shift, max_shift)
            direction = random.randint(0,2)
            if direction:
                right_part = img[:, -shift_idx:, :]
                left_part = img[:, :-shift_idx, :]
            else:
                left_part = img[:, :shift_idx, :]
                right_part = img[:, shift_idx:, :]
            img = np.concatenate((right_part, left_part), axis=1)

        # Fallback
        return img, attr_idx


class RandomBottomCrop(object):
    def __init__(self, size, select, scale=(0.4, 0.8)):
        self.size = size
        self.scale = scale
        self.select = select

    def __call__(self,img, attr_idx):
        if attr_idx not in self.select:
            return img, attr_idx

        h, w, _ = img.shape
        area = h * w
        for attempt in range(10):
            s = random.uniform(self.scale[0], self.scale[1])
            d = 0.25 + (0.45 - 0.25) / (self.scale[1] - self.scale[0]) * (s - self.scale[0])
            target_area = s * area

            new_w = int(round(math.sqrt(target_area)))
            new_h = int(round(math.sqrt(target_area)))

            if new_w < w and new_h < h:
                dw = w-new_w
                dh = h - new_h
                x0 = random.randint(int((0.5-d)*dw), min(int((0.5+d)*dw)+1,dw))
                y0 = (random.randint(max(0,int(0.8*dh)-1), dh))
                out = fixed_crop(img, x0, y0, new_w, new_h, self.size)
                return out, attr_idx

        # Fallback
        return bottom_crop(img, self.size), attr_idx


class BottomCrop():
    def __init__(self, size,  select, scale=(0.4, 0.8)):
        self.size = size
        self.scale = scale
        self.select = select

    def __call__(self,img, attr_idx):
        if attr_idx not in self.select:
            return img, attr_idx

        h, w, _ = img.shape
        area = h * w

        s = (self.scale[0]+self.scale[1])/3.*2.

        target_area = s * area

        new_w = int(round(math.sqrt(target_area)))
        new_h = int(round(math.sqrt(target_area)))

        if new_w < w and new_h < h:
            dw = w-new_w
            dh = h-new_h
            x0 = int(0.5*dw)
            y0 = int(0.9*dh)
            out = fixed_crop(img, x0, y0, new_w, new_h, self.size)
            return out, attr_idx

        # Fallback
        return bottom_crop(img, self.size), attr_idx



class Resize(object):
    def __init__(self, size, inter=cv2.INTER_LINEAR):
        self.size = size
        self.inter = inter

    def __call__(self, image):
        return cv2.resize(image, (self.size[0], self.size[0]), interpolation=self.inter)

class Lighter(object):
    def __init__(self, alpha=(8,12), beta=(-10,10)):
        self.alpha_upper = alpha[0]
        self.alpha_lower = alpha[1]
        self.beta_upper = beta[0]
        self.beta_lower = beta[1]

    def __call__(self, image):

        if random.random() > 0.5:
            image = np.float32(image)
            alpha = random.randint(self.alpha_upper,self.alpha_lower) /10
            beta = random.randint(self.beta_upper,self.beta_lower)

            image = alpha * image + beta

        return image



class ExpandBorder(object):
    def __init__(self,  mode='constant', value=255, size=(336,336), resize=False):
        self.mode = mode
        self.value = value
        self.resize = resize
        self.size = size

    def __call__(self, image):
        h, w, _ = image.shape
        if h > w:
            pad1 = (h-w)//2
            pad2 = h - w - pad1
            if self.mode == 'constant':
                image = np.pad(image, ((0, 0), (pad1, pad2), (0, 0)),
                               self.mode, constant_values=self.value)
            else:
                image = np.pad(image,((0,0), (pad1, pad2),(0,0)), self.mode)
        elif h < w:
            pad1 = (w-h)//2
            pad2 = w-h - pad1
            if self.mode == 'constant':
                image = np.pad(image, ((pad1, pad2),(0, 0), (0, 0)),
                               self.mode,constant_values=self.value)
            else:
                image = np.pad(image, ((pad1, pad2), (0, 0), (0, 0)),self.mode)
        if self.resize:
            image = cv2.resize(image, (self.size[0], self.size[0]),interpolation=cv2.INTER_LINEAR)
        return image

class AstypeToInt():
    def __call__(self, image, attr_idx):
        return image.clip(0,255.0).astype(np.uint8), attr_idx

class AstypeToFloat():
    def __call__(self, image, attr_idx):
        return image.astype(np.float32), attr_idx

import matplotlib.pyplot as plt
class Normalize(object):
    def __init__(self,mean, std):
        '''
        :param mean: RGB order
        :param std:  RGB order
        '''
        self.mean = np.array(mean).reshape(3,1,1)
        self.std = np.array(std).reshape(3,1,1)
    def __call__(self, image):
        '''
        :param image:  (H,W,3)  RGB
        :return:
        '''
        # plt.figure(1)
        # plt.imshow(image)
        # plt.show()
        # return (image.transpose((2, 0, 1)) / 255. - self.mean) / self.std
        return image.transpose((2, 0, 1))

class Normalize_test(object):
    def __init__(self,mean, std):
        '''
        :param mean: RGB order
        :param std:  RGB order
        '''
        self.mean = np.array(mean).reshape(3,1,1)
        self.std = np.array(std).reshape(3,1,1)
    def __call__(self, image_list):
        '''
        :param image:  (H,W,3)  RGB
        :return:
        '''
        # plt.figure(1)
        # plt.imshow(image)
        # plt.show()
        for i in range(len(image_list)):
            image_list[i]=(image_list[i].transpose((2, 0, 1)) / 255. - self.mean) / self.std
        return  image_list

class RandomErasing(object):
    def __init__(self, select,EPSILON=0.5,sl=0.02, sh=0.09, r1=0.3, mean=[0.485, 0.456, 0.406]):
        self.EPSILON = EPSILON
        self.mean = mean
        self.sl = sl
        self.sh = sh
        self.r1 = r1
        self.select = select

    def __call__(self, img,attr_idx):
        if attr_idx not in self.select:
            return img,attr_idx

        if random.uniform(0, 1) > self.EPSILON:
            return img,attr_idx

        for attempt in range(100):
            area = img.shape[1] * img.shape[2]

            target_area = random.uniform(self.sl, self.sh) * area
            aspect_ratio = random.uniform(self.r1, 1 / self.r1)

            h = int(round(math.sqrt(target_area * aspect_ratio)))
            w = int(round(math.sqrt(target_area / aspect_ratio)))

            if w <= img.shape[2] and h <= img.shape[1]:
                x1 = random.randint(0, img.shape[1] - h)
                y1 = random.randint(0, img.shape[2] - w)
                if img.shape[0] == 3:
                    # img[0, x1:x1+h, y1:y1+w] = random.uniform(0, 1)
                    # img[1, x1:x1+h, y1:y1+w] = random.uniform(0, 1)
                    # img[2, x1:x1+h, y1:y1+w] = random.uniform(0, 1)
                    img[0, x1:x1 + h, y1:y1 + w] = self.mean[0]
                    img[1, x1:x1 + h, y1:y1 + w] = self.mean[1]
                    img[2, x1:x1 + h, y1:y1 + w] = self.mean[2]
                    # img[:, x1:x1+h, y1:y1+w] = torch.from_numpy(np.random.rand(3, h, w))
                else:
                    img[0, x1:x1 + h, y1:y1 + w] = self.mean[1]
                    # img[0, x1:x1+h, y1:y1+w] = torch.from_numpy(np.random.rand(1, h, w))
                return img,attr_idx

        return img,attr_idx


'''
if __name__ == '__main__':

    import matplotlib.pyplot as plt


    class FSAug(object):
        def __init__(self):
            self.augment = Compose([
                AstypeToFloat(),
                # RandomHShift(scale=(0.,0.2),select=range(8)),
                # RandomRotate(angles=(-20., 20.), bound=True),
                ExpandBorder(select=range(8), mode='symmetric'),# symmetric
                # Resize(size=(336, 336), select=[ 2, 7]),
                AstypeToInt()
            ])

        def __call__(self, spct,attr_idx):
            return self.augment(spct,attr_idx)


    trans = FSAug()

    img_path = '/media/gserver/data/FashionAI/round2/train/Images/coat_length_labels/0b6b4a2146fc8616a19fcf2026d61d50.jpg'
    img = cv2.cvtColor(cv2.imread(img_path),cv2.COLOR_BGR2RGB)
    img_trans,_ = trans(img,5)
    # img_trans2,_ = trans(img,6)
    print (img_trans.max(), img_trans.min())
    print (img_trans.dtype)

    plt.figure()
    plt.subplot(221)
    plt.imshow(img)

    plt.subplot(222)
    plt.imshow(img_trans)

    # plt.subplot(223)
    # plt.imshow(img_trans2)
    # plt.imshow(img_trans2)
    plt.show()
'''
def Laplas(img):
    src = img
    dst1 = cv2.pyrDown(src)
    dst2 = cv2.pyrDown(dst1)
    dst3 = cv2.pyrDown(dst2)
    dst4 = cv2.pyrUp(dst3)
    dst5 = cv2.pyrUp(dst4)
    dst6 = cv2.pyrUp(dst5)
    h1, w1, c1 = dst6.shape
    src = cv2.resize(src, (w1, h1))
    dst7 = src - dst6
    #image = Image.fromarray(cv2.cvtColor(dst7, cv2.COLOR_BGR2RGB))
    image=cv2.cvtColor(dst7, cv2.COLOR_BGR2RGB)
    return image


if __name__ == '__main__':
    pre_path = '/data1/digital_dataset/1030_data/train_img/'
    img_list = os.listdir(pre_path)
    for image_path in img_list:
        print(pre_path, image_path)
        img = cv2.imread(os.path.join(pre_path, image_path))
        cv2.imshow('test', img)
        # cv2.waitKey(0)
        # cv2.destroyAllWindows()
        # img=cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
        l = perspective_transformation()
        img = l(img)
        plt.imshow(img)
        plt.show()
    print(666)

后面会增加一些非线性的数据增强的方法。。。。。。。。

YAYA视觉

关注

4
点赞
踩
20

收藏

觉得还不错? 一键收藏
0
评论
二、深度学习数据增强方法汇总

深度学习模型训练数据增强方法汇总一、随机裁剪二、RGB-->BGR通道互换三、仿射变换（缩放）三、随机旋转四、对比度调整五、随机抠图六、bound box 中心点随机抠图七、随机缩放后面会增加一些非线性的数据增强的方法。。。。。。。。一、随机裁剪class RandomResize(object): def __init__(self, cfg): self.image_short_size = 768 self.image_max_size = 768
复制链接

扫一扫

专栏目录