yolov5数据增强、代码解读

cv-daily

已于 2023-02-21 10:52:20 修改

阅读量1.4w

点赞数 9

文章标签：计算机视觉 python 深度学习

于 2021-07-27 17:32:04 首次发布

本文链接：https://blog.csdn.net/weixin_41012399/article/details/119143889

版权

代码解读：
https://blog.csdn.net/qq_55745968/article/details/124512331
yolov5中用到的数据增强方法：
self.mosaic 启用马赛克增强
self.mosaic_border = [-img_size // 2, -img_size // 2] 马赛克扩充
albumentations.Blur(p=0.1), 用一个随机尺寸的核来模糊图片
albumentations.MedianBlur(p=0.1), 使用中值滤波
albumentations.ToGray(p=0.01)], 转化成灰度图
HSV color-space HSV空间的数据增强，包括（H色彩，S饱和度，B亮度）
img = np.flipud(img) 上下翻转
img = np.fliplr(img) 水平翻转
mixup(img, labels, *load_mosaic(self, random.randint(0, self.n - 1))) Mixup图像混叠增广
focus的具体过程和作用：https://zhuanlan.zhihu.com/p/337147985

在utils/datasets.py里

class LoadImagesAndLabels(Dataset):  # for training/testing
    def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False,
                 cache_images=False, single_cls=False, stride=32, pad=0.0, prefix=''):
        self.img_size = img_size
        self.augment = augment    ##是否采用数据增强
        self.hyp = hyp
        self.image_weights = image_weights
        self.rect = False if image_weights else rect
        self.mosaic = self.augment and not self.rect  # 是否启用马赛克增强
        self.mosaic_border = [-img_size // 2, -img_size // 2]    ##是否启用马赛克扩充
        self.stride = stride
        self.path = path
        self.albumentations = Albumentations() if augment else None     ##如果数据增强，用pytorch自带的Albumentations()进行数据增强
	...
	...
	    def __getitem__(self, index):
        index = self.indices[index]  # linear, shuffled, or image_weights

        hyp = self.hyp
        mosaic = self.mosaic and random.random() < hyp['mosaic']
        if mosaic:
            # Load mosaic
            img, labels = load_mosaic(self, index)
            shapes = None

            # MixUp augmentation
            if random.random() < hyp['mixup']:
                img, labels = mixup(img, labels, *load_mosaic(self, random.randint(0, self.n - 1)))

        else:
            # Load image
            img, (h0, w0), (h, w) = load_image(self, index)

            # Letterbox
            shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size  # final letterboxed shape
            img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment)
            shapes = (h0, w0), ((h / h0, w / w0), pad)  # for COCO mAP rescaling

            labels = self.labels[index].copy()
            if labels.size:  # normalized xywh to pixel xyxy format
                labels[:, 1:] = xywhn2xyxy(labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1])

            if self.augment:
                img, labels = random_perspective(img, labels,
                                                 degrees=hyp['degrees'],
                                                 translate=hyp['translate'],
                                                 scale=hyp['scale'],
                                                 shear=hyp['shear'],
                                                 perspective=hyp['perspective'])

        nl = len(labels)  # number of labels
        if nl:
            labels[:, 1:5] = xyxy2xywhn(labels[:, 1:5], w=img.shape[1], h=img.shape[0], clip=True, eps=1E-3)

        if self.augment:
            # Albumentations
            img, labels = self.albumentations(img, labels)   ##此处调用utils中的augmentation中的数据增强

            # HSV color-space
            augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v'])

            # Flip up-down
            if random.random() < hyp['flipud']:
                img = np.flipud(img)
                if nl:
                    labels[:, 2] = 1 - labels[:, 2]

            # Flip left-right
            if random.random() < hyp['fliplr']:
                img = np.fliplr(img)
                if nl:
                    labels[:, 1] = 1 - labels[:, 1]

            # Cutouts
            # labels = cutout(img, labels, p=0.5)

        labels_out = torch.zeros((nl, 6))
        if nl:
            labels_out[:, 1:] = torch.from_numpy(labels)

        # Convert
        img = img.transpose((2, 0, 1))[::-1]  # HWC to CHW, BGR to RGB
        img = np.ascontiguousarray(img)

        return torch.from_numpy(img), labels_out, self.img_files[index], shapes

utils中的augmentation.py

class Albumentations:
    # YOLOv5 Albumentations class (optional, only used if package is installed)
    def __init__(self):
        self.transform = None    
        try:
            import albumentations as A     ##需要安装albumentations
            check_version(A.__version__, '1.0.3')  # version requirement

            self.transform = A.Compose([
                A.Blur(p=0.1),
                A.MedianBlur(p=0.1),
                A.ToGray(p=0.01)],
                bbox_params=A.BboxParams(format='yolo', label_fields=['class_labels']))       ##这里只用到了A.Blur，A.MedianBlur，A.ToGray三种数据增强

            logging.info(colorstr('albumentations: ') + ', '.join(f'{x}' for x in self.transform.transforms if x.p))
        except ImportError:  # package not installed, skip
            pass
        except Exception as e:
            logging.info(colorstr('albumentations: ') + f'{e}')

    def __call__(self, im, labels, p=1.0):
        if self.transform and random.random() < p:
            new = self.transform(image=im, bboxes=labels[:, 1:], class_labels=labels[:, 0])  # transformed
            im, labels = new['image'], np.array([[c, *b] for c, b in zip(new['class_labels'], new['bboxes'])])
        return im, labels

train_transform = albumentations.Compose([
        albumentations.Resize(RESIZE_SIZE, RESIZE_SIZE),
        albumentations.OneOf([
            albumentations.RandomGamma(gamma_limit=(60, 120), p=0.9),
            albumentations.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.9),
            albumentations.CLAHE(clip_limit=4.0, tile_grid_size=(4, 4), p=0.9),
        ]),
        albumentations.OneOf([
            albumentations.Blur(blur_limit=4, p=1),
            albumentations.MotionBlur(blur_limit=4, p=1),
            albumentations.MedianBlur(blur_limit=4, p=1)
        ], p=0.5),
        albumentations.HorizontalFlip(p=0.5),
        albumentations.ShiftScaleRotate(shift_limit=0.2, scale_limit=0.2, rotate_limit=20,
                                        interpolation=cv2.INTER_LINEAR, border_mode=cv2.BORDER_CONSTANT, p=1),
        albumentations.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0, p=1.0)
    ])


def mixup(im, labels, im2, labels2):
    # Applies MixUp augmentation https://arxiv.org/pdf/1710.09412.pdf
    r = np.random.beta(32.0, 32.0)  # mixup ratio, alpha=beta=32.0
    im = (im * r + im2 * (1 - r)).astype(np.uint8)
    labels = np.concatenate((labels, labels2), 0)
    return im, labels
    Resize就是拉伸图片修改尺寸
    RandomGamma就是使用gamma变换
    RandomBrightnessContrast就是随机选择图片的对比度和亮度
    CLAHE是一种对比度受限情况下的自适应直方图均衡化算法
    blur就是用一个随机尺寸的核来模糊图片，这个尺寸小于等于blur_limit
    motion blur和上面一样，随机的核，然后使用运动模糊算法来图例图片，运动模糊简单的说就是因为摄像机运动造成的那种模糊的动感。
    medianBlur就是使用中值滤波。
    HorizontalFlip水平翻转
    ShiftScaleRotate这个就是平移缩放旋转三个一，给力！
    Normalize这个就是图像归一化了。

想要修改yolov5中离线增强数据的方法参考：https://cloud.tencent.com/developer/article/1660972

yolov5中的矩形训练
在这里插入图片描述

可以看到yolov5会对图片进行填充，填充为正方形从而传入网络进行训练，可以看到这里面有很多冗余的信息，会让网络产生很多无意义的候选框，矩形训练就是减少这些冗余信息，减少网络产生的无意义的框的数量，加快网络训练速度。yolov5网络的总步长为32，所以其实只要图片边长能够整除32就可以了，不一定完全需要正方形图片传入网络，矩形训练就是将图片填充为最小的32的倍数边长，从而减小冗余信息。
在这里插入图片描述值得一提的是，除了矩形训练，还有矩形推理，也就是在做检测的时候也这样填充，从而加快推理速度，减少推理时间。