代码解读:
https://blog.csdn.net/qq_55745968/article/details/124512331
yolov5中用到的数据增强方法:
self.mosaic 启用马赛克增强
self.mosaic_border = [-img_size // 2, -img_size // 2] 马赛克扩充
albumentations.Blur(p=0.1), 用一个随机尺寸的核来模糊图片
albumentations.MedianBlur(p=0.1), 使用中值滤波
albumentations.ToGray(p=0.01)], 转化成灰度图
HSV color-space HSV空间的数据增强,包括(H色彩,S饱和度,B亮度)
img = np.flipud(img) 上下翻转
img = np.fliplr(img) 水平翻转
mixup(img, labels, *load_mosaic(self, random.randint(0, self.n - 1))) Mixup图像混叠增广
focus的具体过程和作用:https://zhuanlan.zhihu.com/p/337147985
在utils/datasets.py里
class LoadImagesAndLabels(Dataset): # for training/testing
def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False,
cache_images=False, single_cls=False, stride=32, pad=0.0, prefix=''):
self.img_size = img_size
self.augment = augment ##是否采用数据增强
self.hyp = hyp
self.image_weights = image_weights
self.rect = False if image_weights else rect
self.mosaic = self.augment and not self.rect # 是否启用马赛克增强
self.mosaic_border = [-img_size // 2, -img_size // 2] ##是否启用马赛克扩充
self.stride = stride
self.path = path
self.albumentations = Albumentations() if augment else None ##如果数据增强,用pytorch自带的Albumentations()进行数据增强
...
...
def __getitem__(self, index):
index = self.indices[index] # linear, shuffled, or image_weights
hyp = self.hyp
mosaic = self.mosaic and random.random() < hyp['mosaic']
if mosaic:
# Load mosaic
img, labels = load_mosaic(self, index)
shapes = None
# MixUp augmentation
if random.random() < hyp['mixup']:
img, labels = mixup(img, labels, *load_mosaic(self, random.randint(0, self.n - 1)))
else:
# Load image
img, (h0, w0), (h, w) = load_image(self, index)
# Letterbox
shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size # final letterboxed shape
img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment)
shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling
labels = self.labels[index].copy()
if labels.size: # normalized xywh to pixel xyxy format
labels[:, 1:] = xywhn2xyxy(labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1])
if self.augment:
img, labels = random_perspective(img, labels,
degrees=hyp['degrees'],
translate=hyp['translate'],
scale=hyp['scale'],
shear=hyp['shear'],
perspective=hyp['perspective'])
nl = len(labels) # number of labels
if nl:
labels[:, 1:5] = xyxy2xywhn(labels[:, 1:5], w=img.shape[1], h=img.shape[0], clip=True, eps=1E-3)
if self.augment:
# Albumentations
img, labels = self.albumentations(img, labels) ##此处调用utils中的augmentation中的数据增强
# HSV color-space
augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v'])
# Flip up-down
if random.random() < hyp['flipud']:
img = np.flipud(img)
if nl:
labels[:, 2] = 1 - labels[:, 2]
# Flip left-right
if random.random() < hyp['fliplr']:
img = np.fliplr(img)
if nl:
labels[:, 1] = 1 - labels[:, 1]
# Cutouts
# labels = cutout(img, labels, p=0.5)
labels_out = torch.zeros((nl, 6))
if nl:
labels_out[:, 1:] = torch.from_numpy(labels)
# Convert
img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB
img = np.ascontiguousarray(img)
return torch.from_numpy(img), labels_out, self.img_files[index], shapes
utils中的augmentation.py
class Albumentations:
# YOLOv5 Albumentations class (optional, only used if package is installed)
def __init__(self):
self.transform = None
try:
import albumentations as A ##需要安装albumentations
check_version(A.__version__, '1.0.3') # version requirement
self.transform = A.Compose([
A.Blur(p=0.1),
A.MedianBlur(p=0.1),
A.ToGray(p=0.01)],
bbox_params=A.BboxParams(format='yolo', label_fields=['class_labels'])) ##这里只用到了A.Blur,A.MedianBlur,A.ToGray三种数据增强
logging.info(colorstr('albumentations: ') + ', '.join(f'{x}' for x in self.transform.transforms if x.p))
except ImportError: # package not installed, skip
pass
except Exception as e:
logging.info(colorstr('albumentations: ') + f'{e}')
def __call__(self, im, labels, p=1.0):
if self.transform and random.random() < p:
new = self.transform(image=im, bboxes=labels[:, 1:], class_labels=labels[:, 0]) # transformed
im, labels = new['image'], np.array([[c, *b] for c, b in zip(new['class_labels'], new['bboxes'])])
return im, labels
train_transform = albumentations.Compose([
albumentations.Resize(RESIZE_SIZE, RESIZE_SIZE),
albumentations.OneOf([
albumentations.RandomGamma(gamma_limit=(60, 120), p=0.9),
albumentations.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.9),
albumentations.CLAHE(clip_limit=4.0, tile_grid_size=(4, 4), p=0.9),
]),
albumentations.OneOf([
albumentations.Blur(blur_limit=4, p=1),
albumentations.MotionBlur(blur_limit=4, p=1),
albumentations.MedianBlur(blur_limit=4, p=1)
], p=0.5),
albumentations.HorizontalFlip(p=0.5),
albumentations.ShiftScaleRotate(shift_limit=0.2, scale_limit=0.2, rotate_limit=20,
interpolation=cv2.INTER_LINEAR, border_mode=cv2.BORDER_CONSTANT, p=1),
albumentations.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0, p=1.0)
])
def mixup(im, labels, im2, labels2):
# Applies MixUp augmentation https://arxiv.org/pdf/1710.09412.pdf
r = np.random.beta(32.0, 32.0) # mixup ratio, alpha=beta=32.0
im = (im * r + im2 * (1 - r)).astype(np.uint8)
labels = np.concatenate((labels, labels2), 0)
return im, labels
Resize就是拉伸图片修改尺寸
RandomGamma就是使用gamma变换
RandomBrightnessContrast就是随机选择图片的对比度和亮度
CLAHE是一种对比度受限情况下的自适应直方图均衡化算法
blur就是用一个随机尺寸的核来模糊图片,这个尺寸小于等于blur_limit
motion blur和上面一样,随机的核,然后使用运动模糊算法来图例图片,运动模糊简单的说就是因为摄像机运动造成的那种模糊的动感。
medianBlur就是使用中值滤波。
HorizontalFlip水平翻转
ShiftScaleRotate这个就是平移缩放旋转三个一,给力!
Normalize这个就是图像归一化了。
想要修改yolov5中离线增强数据的方法参考:https://cloud.tencent.com/developer/article/1660972
yolov5中的矩形训练
可以看到yolov5会对图片进行填充,填充为正方形从而传入网络进行训练,可以看到这里面有很多冗余的信息,会让网络产生很多无意义的候选框,矩形训练就是减少这些冗余信息,减少网络产生的无意义的框的数量,加快网络训练速度。yolov5网络的总步长为32,所以其实只要图片边长能够整除32就可以了,不一定完全需要正方形图片传入网络,矩形训练就是将图片填充为最小的32的倍数边长,从而减小冗余信息。
值得一提的是,除了矩形训练,还有矩形推理,也就是在做检测的时候也这样填充,从而加快推理速度,减少推理时间。
参考:https://blog.csdn.net/Q1u1NG/article/details/107362572
Mixup图像混叠增广参考:https://blog.csdn.net/zandaoguang/article/details/108525787