计算机视觉技巧合集（四）数据增强之目标检测篇2

最新推荐文章于 2024-04-12 15:04:54 发布

飞天小老虎66

最新推荐文章于 2024-04-12 15:04:54 发布

阅读量249

点赞数 1

分类专栏：计算机视觉技巧合集文章标签：计算机视觉目标检测人工智能

本文链接：https://blog.csdn.net/qq_40691600/article/details/131330826

版权

计算机视觉技巧合集专栏收录该内容

8 篇文章 1 订阅

订阅专栏

5. 增加噪声

增加噪声方法较为常用的是cutout方法，该方法会随机生成一些遮盖区域覆盖掉原图像对应的区域。

cutout示例代码如下：

def bbox_ioa(box1, box2):
    box2 = box2.transpose()

    # 获取box1和box2的左上角和右下角坐标
    b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
    b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]

    # 计算box1和box2的重叠区域大小
    inter_area = (np.minimum(b1_x2, b2_x2) - np.maximum(b1_x1, b2_x1)).clip(0) * \
                 (np.minimum(b1_y2, b2_y2) - np.maximum(b1_y1, b2_y1)).clip(0)

    # 计算box2的区域大小
    box2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) + 1e-16

    # 重叠率
    return inter_area / box2_area

def cutout(image, labels):
    # Applies image cutout augmentation https://arxiv.org/abs/1708.04552
    h, w = image.shape[:2]

    # create random masks
    scales = [0.5] * 1 + [0.25] * 2 + [0.125] * 4 + [0.0625] * 8 + [0.03125] * 16  #权重*对应的图像大小
    for s in scales:
        mask_h = random.randint(1, int(h * s))
        mask_w = random.randint(1, int(w * s))

        # box
        xmin = max(0, random.randint(0, w) - mask_w // 2)
        ymin = max(0, random.randint(0, h) - mask_h // 2)
        xmax = min(w, xmin + mask_w)
        ymax = min(h, ymin + mask_h)

        # 使用随机的颜色覆盖
        image[ymin:ymax, xmin:xmax] = [random.randint(64, 191) for _ in range(3)]

        # 返回保留的框
        if len(labels) and s > 0.03:
            box = np.array([xmin, ymin, xmax, ymax], dtype=np.float32)
            ioa = bbox_ioa(box, labels[:, 1:5])  # 重叠率
            labels = labels[ioa < 0.60]  # 保留重叠率在60%以下的框

    return labels

if __name__ == "__main__":

    img_paths = [r"G:\datasets\VOCdevkit\VOC2012\JPEGImages\2007_002105.jpg", r"G:\datasets\VOCdevkit\VOC2012\JPEGImages\2007_000033.jpg",
                 r"G:\datasets\VOCdevkit\VOC2012\JPEGImages\2007_000042.jpg", r"G:\datasets\VOCdevkit\VOC2012\JPEGImages\2007_000925.jpg",
                 r"G:\datasets\VOCdevkit\VOC2012\JPEGImages\2007_000123.jpg"]
    anno_paths = [r"G:\datasets\VOCdevkit\VOC2012\Annotations\2007_002105.xml", r"G:\datasets\VOCdevkit\VOC2012\Annotations\2007_000033.xml",
                 r"G:\datasets\VOCdevkit\VOC2012\Annotations\2007_000042.xml", r"G:\datasets\VOCdevkit\VOC2012\Annotations\2007_000925.xml",
                 r"G:\datasets\VOCdevkit\VOC2012\Annotations\2007_000123.xml"]
    indices = [0, 1, 2, 3, 4]

    img = cv2.imread(img_paths[0])
    boxes = load_box(anno_paths[0])

    cut_boxes = cutout(img, boxes)

    show_img_boxes("cutout img", img, cut_boxes)

程序运行结果如下：
在这里插入图片描述
Figure1：cutout处理后的图像，图像的高和宽分别是375*500

6. 图像变形

图像变形的数据增强方法主要有平移、旋转、扭曲等变换。

图像变形示例代码如下，参考自https://github.com/ultralytics/yolov5/blob/master/utils/augmentations.py#L144：

import math

# 一般perspective: 0.0均设为0.0
def random_perspective(img, targets=(), segments=(), degrees=10, translate=.1, scale=.1, shear=10, perspective=0.0,
                       border=(0, 0), use_segments=True):

    # 获得样本的高和宽
    height = img.shape[0] + border[0] * 2
    width = img.shape[1] + border[1] * 2

    # 平移矩阵 C，用于将图像的中心点移动到原点(0,0)
    C = np.eye(3)
    C[0, 2] = -img.shape[1] / 2  # x translation (pixels)
    C[1, 2] = -img.shape[0] / 2  # y translation (pixels)

    # 透视变换矩阵 P，通过随机生成的透视参数对图像进行投影变换
    P = np.eye(3)
    P[2, 0] = random.uniform(-perspective, perspective)  # x perspective (about y)
    P[2, 1] = random.uniform(-perspective, perspective)  # y perspective (about x)

    # 旋转和缩放变换矩阵 R，通过随机生成的角度和尺度对图像进行旋转和缩放变换。角度 a 控制旋转的角度，尺度 s 控制缩放的比例。
    R = np.eye(3)
    a = random.uniform(-degrees, degrees)
    # a += random.choice([-180, -90, 0, 90])  # add 90deg rotations to small rotations
    s = random.uniform(1 - scale, 1 + scale)
    # s = 2 ** random.uniform(-scale, scale)
    R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s)

    # 剪切变换矩阵 S，通过随机生成的剪切参数对图像进行剪切变换
    S = np.eye(3)
    S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180)  # x shear (deg)
    S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180)  # y shear (deg)

    # 平移变换矩阵 T，通过随机生成的平移参数对图像进行平移变换
    T = np.eye(3)
    T[0, 2] = random.uniform(0.5 - translate, 0.5 + translate) * width  # x translation (pixels)
    T[1, 2] = random.uniform(0.5 - translate, 0.5 + translate) * height  # y translation (pixels)

    # 合并旋转矩阵
    M = T @ S @ R @ P @ C  # 操作顺序是从右到左的（非常重要）
    if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any():  # 图像发生了变化
        # 透视变换
        if perspective:
            img = cv2.warpPerspective(img, M, dsize=(width, height), borderValue=(114, 114, 114))
        # 仿射变换
        else:
            img = cv2.warpAffine(img, M[:2], dsize=(width, height), borderValue=(114, 114, 114))

    # 变换真实框坐标
    n = len(targets)
    if n:
        new = np.zeros((n, 4))
        xy = np.ones((n * 4, 3))
        xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2)  # x1y1, x2y2, x1y2, x2y1
        # 真实框的顶点像素做跟图像一样的变换
        xy = xy @ M.T
        # 透视变换或仿射变换
        xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]).reshape(n, 8)  # 

        # 最小外包矩形框
        x = xy[:, [0, 2, 4, 6]]
        y = xy[:, [1, 3, 5, 7]]
        new = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T

        # 将坐标限制在图像内
        new[:, [0, 2]] = new[:, [0, 2]].clip(0, width)
        new[:, [1, 3]] = new[:, [1, 3]].clip(0, height)

        # 设置一些过滤条件，过滤掉不合适的框
        i = box_candidates(box1=targets[:, 1:5].T * s, box2=new.T, area_thr=0.01 if use_segments else 0.10)
        targets = targets[i]
        targets[:, 1:5] = new[i]

    return img, targets


# 通过设定高、宽的阈值，设定高宽比的阈值，设定区域面积比的阈值来筛选可以使用的框
# eps是为了防止除以0
def box_candidates(box1, box2, wh_thr=2, ar_thr=20, area_thr=0.1, eps=1e-16):  # box1(4,n), box2(4,n)
    # Compute candidate boxes: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio
    w1, h1 = box1[2] - box1[0], box1[3] - box1[1]
    w2, h2 = box2[2] - box2[0], box2[3] - box2[1]
    ar = np.maximum(w2 / (h2 + eps), h2 / (w2 + eps))  # aspect ratio
    return (w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / (w1 * h1 + eps) > area_thr) & (ar < ar_thr)  # candidates

if __name__ == "__main__":

    img_paths = [r"G:\datasets\VOCdevkit\VOC2012\JPEGImages\2007_002105.jpg", r"G:\datasets\VOCdevkit\VOC2012\JPEGImages\2007_000033.jpg",
                 r"G:\datasets\VOCdevkit\VOC2012\JPEGImages\2007_000042.jpg", r"G:\datasets\VOCdevkit\VOC2012\JPEGImages\2007_000925.jpg",
                 r"G:\datasets\VOCdevkit\VOC2012\JPEGImages\2007_000123.jpg"]
    anno_paths = [r"G:\datasets\VOCdevkit\VOC2012\Annotations\2007_002105.xml", r"G:\datasets\VOCdevkit\VOC2012\Annotations\2007_000033.xml",
                 r"G:\datasets\VOCdevkit\VOC2012\Annotations\2007_000042.xml", r"G:\datasets\VOCdevkit\VOC2012\Annotations\2007_000925.xml",
                 r"G:\datasets\VOCdevkit\VOC2012\Annotations\2007_000123.xml"]
    indices = [0, 1, 2, 3, 4]

    print(f"原图像路径数量：len(img_paths), 原标签路径数量： len(anno_paths)")
    remove_img(img_paths, anno_paths)
    print(f"处理后的图像路径数量：len(img_paths), 处理后的标签路径数量： len(anno_paths)")

    img_size = 640

    img = cv2.imread(img_paths[0])
    boxes = load_box(anno_paths[0])

    perspective_img, perspective_boxes = random_perspective(img, boxes)

    show_img_boxes("perspective img", perspective_img, perspective_boxes)

程序运行结果如下：
在这里插入图片描述
Figure2：mixup处理后的图像，图像的高和宽分别是375*500

7. 拼接、合并图像

拼接、合并图像中常用的数据增强方法有mosaic和mixup这2种方法。

mosaic数据增强方法

mosaic方法会从数据集中随机选取4张图像，将其拼接在一起，从而极大地增加了样本的多样性和复杂度。

mosaic示例代码如下，参考自https://github.com/ultralytics/yolov5/blob/master/utils/dataloaders.py#L751：

# 读取图像
def load_img(img_path, img_size):
    img = cv2.imread(img_path)
    h, w = img.shape[0], img.shape[1]
    r = img_size / max(h, w)
    if r !=1:
        img = cv2.resize(img, (int(w*r), int(h*r)), interpolation=cv2.INTER_AREA)
    return img, h, w, img.shape[:2]

# mosaic(拼接)
def mosaic(img_size, img_paths, anno_paths, index, indices):

    mosaic_border = [-img_size // 2, -img_size // 2]
    yc, xc = (int(random.uniform(-x, 2 * img_size + x)) for x in mosaic_border)

    index4 = [index] + random.sample(indices, 3)

    label4 = []

    img4 = np.full((img_size * 2, img_size * 2, 3), 114, dtype=np.uint8)
    for i, index in enumerate(index4):
        img, origin_h, origin_w, (scale_h, scale_w) = load_img(img_paths[index], img_size)
        boxes = load_box(anno_paths[index])

        h, w = img.shape[0], img.shape[1]
        if i == 0:
            # 画布上图像的位置
            x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc
            # 截取的原图区域
            x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h
        elif i == 1:
            x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, img_size * 2), yc
            x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
        elif i == 2:
            x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(yc + h, img_size * 2)
            x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
        elif i == 3:
            x1a, y1a, x2a, y2a = xc, yc, min(w + xc, img_size * 2), min(yc + h, img_size * 2)
            x1b, y1b, x2b, y2b = 0, 0, min(x2a - x1a, w), min(y2a - y1a, h)
        # h,w
        img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b]
        padw = x1a - x1b
        padh = y1a - y1b

        ratio = scale_h / origin_h
        boxes_pad = boxes.copy()
        boxes_pad[:, 1] = boxes[:, 1] * ratio + padw
        boxes_pad[:, 2] = boxes[:, 2] * ratio + padh
        boxes_pad[:, 3] = boxes[:, 3] * ratio + padw
        boxes_pad[:, 4] = boxes[:, 4] * ratio + padh
        label4.append(boxes_pad)

    label4 = np.concatenate(label4, 0)

    for label in label4[:, 1:]:
        np.clip(label, 0, 640 * 2, out=label)

    return img4, label4
   
if __name__ == "__main__":
    img_paths = [r"G:\datasets\VOCdevkit\VOC2012\JPEGImages\2007_002105.jpg", r"G:\datasets\VOCdevkit\VOC2012\JPEGImages\2007_000033.jpg",
                 r"G:\datasets\VOCdevkit\VOC2012\JPEGImages\2007_000042.jpg", r"G:\datasets\VOCdevkit\VOC2012\JPEGImages\2007_000925.jpg",
                 r"G:\datasets\VOCdevkit\VOC2012\JPEGImages\2007_000123.jpg"]
    anno_paths = [r"G:\datasets\VOCdevkit\VOC2012\Annotations\2007_002105.xml", r"G:\datasets\VOCdevkit\VOC2012\Annotations\2007_000033.xml",
                 r"G:\datasets\VOCdevkit\VOC2012\Annotations\2007_000042.xml", r"G:\datasets\VOCdevkit\VOC2012\Annotations\2007_000925.xml",
                 r"G:\datasets\VOCdevkit\VOC2012\Annotations\2007_000123.xml"]
    indices = [0, 1, 2, 3, 4]
    
    print(f"原图像路径数量：len(img_paths), 原标签路径数量： len(anno_paths)")
    remove_img(img_paths, anno_paths)
    print(f"处理后的图像路径数量：len(img_paths), 处理后的标签路径数量： len(anno_paths)")

    img_size = 640

    orgin_img, orgin_boxes = mosaic(img_size, img_paths, anno_paths, 1, indices)
    scale_img, scale_boxes = scale(orgin_img, orgin_boxes, scaleFill=True)

    show_img_boxes("mosaic img", orgin_img, orgin_boxes)
    show_img_boxes("mosaic&scale img", scale_img, scale_boxes)

程序运行结果如下：

![在这里插入图片描述](https://img-blog.csdnimg.cn/eb43a9c91d9b4f079ee614c7f4ef1974.jpeg#pic_center) Figure3：mosaic处理后的图像，图像的高和宽分别是1280*1280 ![在这里插入图片描述](https://img-blog.csdnimg.cn/d2307eda4cfd43ca98e8f953c9e9de5e.jpeg#pic_center) Figure4：mosaic和缩放处理后的图像，图像的高和宽分别是640*640

mixup数据增强方法

mixup方法首先会生成一个随机透明度，然后利用该透明度分别改变2张输入图像的透明度，最后将2张图像重叠起来得到新图像。

mixup示例代码如下，参考自https://github.com/ultralytics/yolov5/blob/master/utils/augmentations.py#L289：

# mixup
def mixup(img1, label1, img2, label2):
    r = np.random.beta(32.0, 32.0)
    img = (img1 * r + img2 * (1 - r)).astype(np.uint8)
    labels = np.concatenate((label1, label2), 0)
    return img, labels

if __name__ == "__main__":
    img_paths = [r"G:\datasets\VOCdevkit\VOC2012\JPEGImages\2007_002105.jpg", r"G:\datasets\VOCdevkit\VOC2012\JPEGImages\2007_000033.jpg",
                 r"G:\datasets\VOCdevkit\VOC2012\JPEGImages\2007_000042.jpg", r"G:\datasets\VOCdevkit\VOC2012\JPEGImages\2007_000925.jpg",
                 r"G:\datasets\VOCdevkit\VOC2012\JPEGImages\2007_000123.jpg"]
    anno_paths = [r"G:\datasets\VOCdevkit\VOC2012\Annotations\2007_002105.xml", r"G:\datasets\VOCdevkit\VOC2012\Annotations\2007_000033.xml",
                 r"G:\datasets\VOCdevkit\VOC2012\Annotations\2007_000042.xml", r"G:\datasets\VOCdevkit\VOC2012\Annotations\2007_000925.xml",
                 r"G:\datasets\VOCdevkit\VOC2012\Annotations\2007_000123.xml"]
    indices = [0, 1, 2, 3, 4]

    print(f"原图像路径数量：len(img_paths), 原标签路径数量： len(anno_paths)")
    remove_img(img_paths, anno_paths)
    print(f"处理后的图像路径数量：len(img_paths), 处理后的标签路径数量： len(anno_paths)")

    img_size = 640

    img1 = cv2.imread(img_paths[0])
    boxes1 = load_box(anno_paths[0])
    img1, boxes1 = scale(img1, boxes1, scaleFill=True)

    img2 = cv2.imread(img_paths[1])
    boxes2 = load_box(anno_paths[1])
    img2, boxes2 = scale(img2, boxes2, scaleFill=True)

    mixup_img, mixup_boxes = mixup(img1, boxes1, img2, boxes2)

    show_img_boxes("mixup img", mixup_img, mixup_boxes)

程序运行结果如下：
在这里插入图片描述
Figure5：mixup处理后的图像，图像的高和宽分别是640*640

飞天小老虎66

关注

1
点赞
踩
2

收藏

觉得还不错? 一键收藏
0
评论
计算机视觉技巧合集（四）数据增强之目标检测篇2

mixup方法首先会生成一个随机透明度，然后利用该透明度分别改变2张输入图像的透明度，最后将2张图像重叠起来得到新图像。Figure3：mosaic处理后的图像，图像的高和宽分别是1280*1280。Figure4：mosaic和缩放处理后的图像，图像的高和宽分别是640*640。Figure1：cutout处理后的图像，图像的高和宽分别是375*500。Figure2：mixup处理后的图像，图像的高和宽分别是375*500。Figure5：mixup处理后的图像，图像的高和宽分别是640*640。
复制链接

扫一扫