【python】Data Augmentation

bryant_meng

于 2024-07-04 10:01:27 发布

阅读量337

点赞数 4

分类专栏： Python 文章标签： python 开发语言

本文链接：https://blog.csdn.net/bryant_meng/article/details/140169055

版权

Python 专栏收录该内容

113 篇文章 7 订阅

订阅专栏

在这里插入图片描述

参考学习来自：使用Python+OpenCV进行数据增广方法综述（附代码演练）

文章目录

Random Cut
Cutout
Color Jitter
Add Noisy

Random Cut

随机裁剪图片中的长款缩放为 scale 倍的区域，resize 到原图大小，bounding box 的坐标也相应的改变

引入了丢弃机制，裁剪后 bbox 的面积如果小于原来的 50%，该 bbox 丢弃

import random
import cv2


def randomcrop(img, gt_boxes, scale=0.5, ratio=0.25):
    '''
    ### Random Crop ###
    img: image
    gt_boxes: format [[obj x1 y1 x2 y2],...]
    scale: percentage of cropped area
    '''
    
    # Crop image
    height, width = int(img.shape[0]*scale), int(img.shape[1]*scale)
    # Top Left Coordinate
    x = random.randint(0, img.shape[1] - int(width))
    y = random.randint(0, img.shape[0] - int(height))
    cropped = img[y:y+height, x:x+width]

    # Visisual bbox
    img_copy = img.copy()
    cv2.rectangle(img_copy, (x, y), (x+width, y+height), color=[255, 0, 255], thickness=10)

    for i in range(len(gt_boxes)):
        cv2.rectangle(img_copy, (gt_boxes[i][1], gt_boxes[i][2]), (gt_boxes[i][3], gt_boxes[i][4]),
                      color=colors[i], thickness=5)
    cv2.imwrite("bbox.jpg", img_copy)

    resized = cv2.resize(cropped, (img.shape[1], img.shape[0]))

    # Modify annotation
    new_boxes = []
    for box in gt_boxes:
        obj_name = box[0]
        x1 = int(box[1])
        y1 = int(box[2])
        x2 = int(box[3])
        y2 = int(box[4])

        if ratio: # 是否丢弃裁剪过半的 bbox
            ori_area = (y2 - y1) * (x2 - x1)
            x1_new, y1_new, x2_new, y2_new = x1, y1, x2, y2
            if x1_new < x:
                x1_new = x
            if y1_new < y:
                y1_new = y
            if x2_new > x+width:
                x2_new = x+width
            if y2_new > y+height:
                y2_new = y+height
            new_area = (y2_new-y1_new) * (x2_new-x1_new)

            if (new_area / ori_area) < ratio:
                continue

        x1, x2 = x1-x, x2-x
        y1, y2 = y1-y, y2-y

        x1, y1, x2, y2 = x1/scale, y1/scale, x2/scale, y2/scale
        if (x1 < img.shape[1] and y1 < img.shape[0]) and (x2 > 0 and y2 > 0):
            if x1 < 0: x1 = 0
            if y1 < 0: y1 = 0
            if x2 > img.shape[1]: x2 = img.shape[1]
            if y2 > img.shape[0]: y2 = img.shape[0]
            new_boxes.append([obj_name, x1, y1, x2, y2])
    return resized, new_boxes


if __name__ == "__main__":
    colors = [[0, 0, 255],
              [0, 255, 0],
              [255, 0, 0],
              [0, 255, 255]]

    bbox = [["person", 777, 221, 1361, 769],
            ["cat", 1085, 413, 1337, 617],
            ['bird', 413, 273, 625, 581],
            ['bag', 877, 781, 1013, 909]]

    img = cv2.imread("./1.jpg")
    resized, new_bbox = randomcrop(img, bbox, scale=0.5, ratio=0.5)
    print(new_bbox)
    """
    [['person', 1162.0, 164.0, 1920, 1080], ['bird', 434.0, 268.0, 858.0, 884.0]]
    """

    for i in range(len(new_bbox)):
        cv2.rectangle(resized, (int(new_bbox[i][1]), int(new_bbox[i][2])),
                      (int(new_bbox[i][3]), int(new_bbox[i][4])),
                      color=colors[i], thickness=5)
    cv2.imwrite("resized.jpg", resized)

    # cv2.imshow("new bbox", resized)
    # cv2.waitKey(0)
    # cv2.destroyAllWindows()

输入图片
在这里插入图片描述

案例1

bbox.jpg，粉色框是随机裁剪的区域，其他框是目标

在这里插入图片描述
resized.jpg，粉色区域也被缩放成原图大小，可以看到猫的框裁剪过半，被舍弃了

在这里插入图片描述

案例2

在这里插入图片描述
未出现裁剪过半的矩形框，所有目标都被保留

在这里插入图片描述

案例3

人和海鸥裁剪均过半，被舍弃，猫和包裁剪未过半，得以保留

在这里插入图片描述
resize 到原图大小

在这里插入图片描述

Cutout

import random
import cv2

def cutout(img, gt_boxes, amount=0.5):
    '''
    ### Cutout ###
    img: image
    gt_boxes: format [[obj x1 y1 x2 y2],...]
    amount: num of masks / num of objects 
    '''
    out = img.copy()
    ran_select = random.sample(gt_boxes, round(amount*len(gt_boxes)))

    for box in ran_select:
        x1 = int(box[1])
        y1 = int(box[2])
        x2 = int(box[3])
        y2 = int(box[4])
        mask_w = int((x2 - x1)*0.5)
        mask_h = int((y2 - y1)*0.5)
        mask_x1 = random.randint(x1, x2 - mask_w)
        mask_y1 = random.randint(y1, y2 - mask_h)
        mask_x2 = mask_x1 + mask_w
        mask_y2 = mask_y1 + mask_h
        cv2.rectangle(out, (mask_x1, mask_y1), (mask_x2, mask_y2), (0, 0, 0), thickness=-1)
    return out


if __name__ == "__main__":
    colors = [[0, 0, 255],
              [0, 255, 0],
              [255, 0, 0],
              [0, 255, 255]]

    bbox = [["person", 777, 221, 1361, 769],
            ["cat", 1085, 413, 1337, 617],
            ['bird', 413, 273, 625, 581],
            ['bag', 877, 781, 1013, 909]]

    img = cv2.imread("./1.jpg")
    out = cutout(img, bbox)
    for i in range(len(bbox)):
        cv2.rectangle(out, (bbox[i][1], bbox[i][2]), (bbox[i][3], bbox[i][4]),
                      color=colors[i], thickness=5)
    cv2.imwrite("cutout.jpg", out)

在目标中进行 mask，随机左上角，h和w固定为设定值——1/2

输入图片

在这里插入图片描述

输出结果

在这里插入图片描述
可以观察到，人形框和小猫框被 mask 掉了 1/4 区域

Color Jitter

import random
import cv2
import numpy as np

def colorjitter(img, cj_type="b"):
    '''
    ### Different Color Jitter ###
    img: image
    cj_type: {b: brightness, s: saturation, c: constast}
    '''
    if cj_type == "b":
        # value = random.randint(-50, 50)
        value = np.random.choice(np.array([-50, -40, -30, 30, 40, 50]))
        hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
        h, s, v = cv2.split(hsv)
        if value >= 0:
            lim = 255 - value
            v[v > lim] = 255
            v[v <= lim] += value
        else:
            lim = np.absolute(value)
            v[v < lim] = 0
            v[v >= lim] -= np.absolute(value)

        final_hsv = cv2.merge((h, s, v))
        img = cv2.cvtColor(final_hsv, cv2.COLOR_HSV2BGR)
        return img
    
    elif cj_type == "s":
        # value = random.randint(-50, 50)
        value = np.random.choice(np.array([-50, -40, -30, 30, 40, 50]))
        hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
        h, s, v = cv2.split(hsv)
        if value >= 0:
            lim = 255 - value
            s[s > lim] = 255
            s[s <= lim] += value
        else:
            lim = np.absolute(value)
            s[s < lim] = 0
            s[s >= lim] -= np.absolute(value)

        final_hsv = cv2.merge((h, s, v))
        img = cv2.cvtColor(final_hsv, cv2.COLOR_HSV2BGR)
        return img
    
    elif cj_type == "c":
        brightness = 10
        contrast = random.randint(40, 100)
        dummy = np.int16(img)
        dummy = dummy * (contrast/127+1) - contrast + brightness
        dummy = np.clip(dummy, 0, 255)
        img = np.uint8(dummy)
        return img


if __name__ == "__main__":
    img = cv2.imread("./1.jpg")
    img1 = colorjitter(img, "b")
    img2 = colorjitter(img, "s")
    img3 = colorjitter(img, "c")
    cv2.imwrite("b.jpg", img1)
    cv2.imwrite("s.jpg", img2)
    cv2.imwrite("c.jpg", img3)

输入图片

在这里插入图片描述

输出结果

b.jpg 亮度增广
在这里插入图片描述

c.jpg 对比度增广
在这里插入图片描述
s.jpg 饱和度增广

在这里插入图片描述

Add Noisy

import cv2
import numpy as np


def noisy(img, noise_type="gauss"):
    '''
    ### Adding Noise ###
    img: image
    cj_type: {gauss: gaussian, sp: salt & pepper}
    '''
    if noise_type == "gauss":
        image=img.copy() 
        mean=0
        st=0.7
        gauss = np.random.normal(mean,st,image.shape)
        gauss = gauss.astype('uint8')
        image = cv2.add(image,gauss)
        return image
    
    elif noise_type == "sp":
        image=img.copy() 
        prob = 0.05
        if len(image.shape) == 2:
            black = 0
            white = 255            
        else:
            colorspace = image.shape[2]
            if colorspace == 3:  # RGB
                black = np.array([0, 0, 0], dtype='uint8')
                white = np.array([255, 255, 255], dtype='uint8')
            else:  # RGBA
                black = np.array([0, 0, 0, 255], dtype='uint8')
                white = np.array([255, 255, 255, 255], dtype='uint8')
        probs = np.random.random(image.shape[:2])
        image[probs < (prob / 2)] = black
        image[probs > 1 - (prob / 2)] = white
        return image


if __name__ == "__main__":
    img = cv2.imread("./1.jpg")
    img1 = noisy(img, "gauss")
    img2 = noisy(img, "sp")
    cv2.imwrite("gauss.jpg", img1)
    cv2.imwrite("sp.jpg", img2)