5. 增加噪声
增加噪声方法较为常用的是cutout方法,该方法会随机生成一些遮盖区域覆盖掉原图像对应的区域。
cutout示例代码如下:
def bbox_ioa(box1, box2):
box2 = box2.transpose()
# 获取box1和box2的左上角和右下角坐标
b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
# 计算box1和box2的重叠区域大小
inter_area = (np.minimum(b1_x2, b2_x2) - np.maximum(b1_x1, b2_x1)).clip(0) * \
(np.minimum(b1_y2, b2_y2) - np.maximum(b1_y1, b2_y1)).clip(0)
# 计算box2的区域大小
box2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) + 1e-16
# 重叠率
return inter_area / box2_area
def cutout(image, labels):
# Applies image cutout augmentation https://arxiv.org/abs/1708.04552
h, w = image.shape[:2]
# create random masks
scales = [0.5] * 1 + [0.25] * 2 + [0.125] * 4 + [0.0625] * 8 + [0.03125] * 16 #权重*对应的图像大小
for s in scales:
mask_h = random.randint(1, int(h * s))
mask_w = random.randint(1, int(w * s))
# box
xmin = max(0, random.randint(0, w) - mask_w // 2)
ymin = max(0, random.randint(0, h) - mask_h // 2)
xmax = min(w, xmin + mask_w)
ymax = min(h, ymin + mask_h)
# 使用随机的颜色覆盖
image[ymin:ymax, xmin:xmax] = [random.randint(64, 191) for _ in range(3)]
# 返回保留的框
if len(labels) and s > 0.03:
box = np.array([xmin, ymin, xmax, ymax], dtype=np.float32)
ioa = bbox_ioa(box, labels[:, 1:5]) # 重叠率
labels = labels[ioa < 0.60] # 保留重叠率在60%以下的框
return labels
if __name__ == "__main__":
img_paths = [r"G:\datasets\VOCdevkit\VOC2012\JPEGImages\2007_002105.jpg", r"G:\datasets\VOCdevkit\VOC2012\JPEGImages\2007_000033.jpg",
r"G:\datasets\VOCdevkit\VOC2012\JPEGImages\2007_000042.jpg", r"G:\datasets\VOCdevkit\VOC2012\JPEGImages\2007_000925.jpg",
r"G:\datasets\VOCdevkit\VOC2012\JPEGImages\2007_000123.jpg"]
anno_paths = [r"G:\datasets\VOCdevkit\VOC2012\Annotations\2007_002105.xml", r"G:\datasets\VOCdevkit\VOC2012\Annotations\2007_000033.xml",
r"G:\datasets\VOCdevkit\VOC2012\Annotations\2007_000042.xml", r"G:\datasets\VOCdevkit\VOC2012\Annotations\2007_000925.xml",
r"G:\datasets\VOCdevkit\VOC2012\Annotations\2007_000123.xml"]
indices = [0, 1, 2, 3, 4]
img = cv2.imread(img_paths[0])
boxes = load_box(anno_paths[0])
cut_boxes = cutout(img, boxes)
show_img_boxes("cutout img", img, cut_boxes)
程序运行结果如下:
Figure1:cutout处理后的图像,图像的高和宽分别是375*500
6. 图像变形
图像变形的数据增强方法主要有平移、旋转、扭曲等变换。
图像变形示例代码如下,参考自https://github.com/ultralytics/yolov5/blob/master/utils/augmentations.py#L144:
import math
# 一般perspective: 0.0均设为0.0
def random_perspective(img, targets=(), segments=(), degrees=10, translate=.1, scale=.1, shear=10, perspective=0.0,
border=(0, 0), use_segments=True):
# 获得样本的高和宽
height = img.shape[0] + border[0] * 2
width = img.shape[1] + border[1] * 2
# 平移矩阵 C,用于将图像的中心点移动到原点(0,0)
C = np.eye(3)
C[0, 2] = -img.shape[1] / 2 # x translation (pixels)
C[1, 2] = -img.shape[0] / 2 # y translation (pixels)
# 透视变换矩阵 P,通过随机生成的透视参数对图像进行投影变换
P = np.eye(3)
P[2, 0] = random.uniform(-perspective, perspective) # x perspective (about y)
P[2, 1] = random.uniform(-perspective, perspective) # y perspective (about x)
# 旋转和缩放变换矩阵 R,通过随机生成的角度和尺度对图像进行旋转和缩放变换。角度 a 控制旋转的角度,尺度 s 控制缩放的比例。
R = np.eye(3)
a = random.uniform(-degrees, degrees)
# a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations
s = random.uniform(1 - scale, 1 + scale)
# s = 2 ** random.uniform(-scale, scale)
R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s)
# 剪切变换矩阵 S,通过随机生成的剪切参数对图像进行剪切变换
S = np.eye(3)
S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg)
S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg)
# 平移变换矩阵 T,通过随机生成的平移参数对图像进行平移变换
T = np.eye(3)
T[0, 2] = random.uniform(0.5 - translate, 0.5 + translate) * width # x translation (pixels)
T[1, 2] = random.uniform(0.5 - translate, 0.5 + translate) * height # y translation (pixels)
# 合并旋转矩阵
M = T @ S @ R @ P @ C # 操作顺序是从右到左的(非常重要)
if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # 图像发生了变化
# 透视变换
if perspective:
img = cv2.warpPerspective(img, M, dsize=(width, height), borderValue=(114, 114, 114))
# 仿射变换
else:
img = cv2.warpAffine(img, M[:2], dsize=(width, height), borderValue=(114, 114, 114))
# 变换真实框坐标
n = len(targets)
if n:
new = np.zeros((n, 4))
xy = np.ones((n * 4, 3))
xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1
# 真实框的顶点像素做跟图像一样的变换
xy = xy @ M.T
# 透视变换或仿射变换
xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]).reshape(n, 8) #
# 最小外包矩形框
x = xy[:, [0, 2, 4, 6]]
y = xy[:, [1, 3, 5, 7]]
new = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
# 将坐标限制在图像内
new[:, [0, 2]] = new[:, [0, 2]].clip(0, width)
new[:, [1, 3]] = new[:, [1, 3]].clip(0, height)
# 设置一些过滤条件,过滤掉不合适的框
i = box_candidates(box1=targets[:, 1:5].T * s, box2=new.T, area_thr=0.01 if use_segments else 0.10)
targets = targets[i]
targets[:, 1:5] = new[i]
return img, targets
# 通过设定高、宽的阈值,设定高宽比的阈值,设定区域面积比的阈值来筛选可以使用的框
# eps是为了防止除以0
def box_candidates(box1, box2, wh_thr=2, ar_thr=20, area_thr=0.1, eps=1e-16): # box1(4,n), box2(4,n)
# Compute candidate boxes: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio
w1, h1 = box1[2] - box1[0], box1[3] - box1[1]
w2, h2 = box2[2] - box2[0], box2[3] - box2[1]
ar = np.maximum(w2 / (h2 + eps), h2 / (w2 + eps)) # aspect ratio
return (w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / (w1 * h1 + eps) > area_thr) & (ar < ar_thr) # candidates
if __name__ == "__main__":
img_paths = [r"G:\datasets\VOCdevkit\VOC2012\JPEGImages\2007_002105.jpg", r"G:\datasets\VOCdevkit\VOC2012\JPEGImages\2007_000033.jpg",
r"G:\datasets\VOCdevkit\VOC2012\JPEGImages\2007_000042.jpg", r"G:\datasets\VOCdevkit\VOC2012\JPEGImages\2007_000925.jpg",
r"G:\datasets\VOCdevkit\VOC2012\JPEGImages\2007_000123.jpg"]
anno_paths = [r"G:\datasets\VOCdevkit\VOC2012\Annotations\2007_002105.xml", r"G:\datasets\VOCdevkit\VOC2012\Annotations\2007_000033.xml",
r"G:\datasets\VOCdevkit\VOC2012\Annotations\2007_000042.xml", r"G:\datasets\VOCdevkit\VOC2012\Annotations\2007_000925.xml",
r"G:\datasets\VOCdevkit\VOC2012\Annotations\2007_000123.xml"]
indices = [0, 1, 2, 3, 4]
print(f"原图像路径数量:len(img_paths), 原标签路径数量: len(anno_paths)")
remove_img(img_paths, anno_paths)
print(f"处理后的图像路径数量:len(img_paths), 处理后的标签路径数量: len(anno_paths)")
img_size = 640
img = cv2.imread(img_paths[0])
boxes = load_box(anno_paths[0])
perspective_img, perspective_boxes = random_perspective(img, boxes)
show_img_boxes("perspective img", perspective_img, perspective_boxes)
程序运行结果如下:
Figure2:mixup处理后的图像,图像的高和宽分别是375*500
7. 拼接、合并图像
拼接、合并图像中常用的数据增强方法有mosaic和mixup这2种方法。
mosaic数据增强方法
mosaic方法会从数据集中随机选取4张图像,将其拼接在一起,从而极大地增加了样本的多样性和复杂度。
mosaic示例代码如下,参考自https://github.com/ultralytics/yolov5/blob/master/utils/dataloaders.py#L751:
# 读取图像
def load_img(img_path, img_size):
img = cv2.imread(img_path)
h, w = img.shape[0], img.shape[1]
r = img_size / max(h, w)
if r !=1:
img = cv2.resize(img, (int(w*r), int(h*r)), interpolation=cv2.INTER_AREA)
return img, h, w, img.shape[:2]
# mosaic(拼接)
def mosaic(img_size, img_paths, anno_paths, index, indices):
mosaic_border = [-img_size // 2, -img_size // 2]
yc, xc = (int(random.uniform(-x, 2 * img_size + x)) for x in mosaic_border)
index4 = [index] + random.sample(indices, 3)
label4 = []
img4 = np.full((img_size * 2, img_size * 2, 3), 114, dtype=np.uint8)
for i, index in enumerate(index4):
img, origin_h, origin_w, (scale_h, scale_w) = load_img(img_paths[index], img_size)
boxes = load_box(anno_paths[index])
h, w = img.shape[0], img.shape[1]
if i == 0:
# 画布上图像的位置
x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc
# 截取的原图区域
x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h
elif i == 1:
x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, img_size * 2), yc
x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
elif i == 2:
x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(yc + h, img_size * 2)
x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
elif i == 3:
x1a, y1a, x2a, y2a = xc, yc, min(w + xc, img_size * 2), min(yc + h, img_size * 2)
x1b, y1b, x2b, y2b = 0, 0, min(x2a - x1a, w), min(y2a - y1a, h)
# h,w
img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b]
padw = x1a - x1b
padh = y1a - y1b
ratio = scale_h / origin_h
boxes_pad = boxes.copy()
boxes_pad[:, 1] = boxes[:, 1] * ratio + padw
boxes_pad[:, 2] = boxes[:, 2] * ratio + padh
boxes_pad[:, 3] = boxes[:, 3] * ratio + padw
boxes_pad[:, 4] = boxes[:, 4] * ratio + padh
label4.append(boxes_pad)
label4 = np.concatenate(label4, 0)
for label in label4[:, 1:]:
np.clip(label, 0, 640 * 2, out=label)
return img4, label4
if __name__ == "__main__":
img_paths = [r"G:\datasets\VOCdevkit\VOC2012\JPEGImages\2007_002105.jpg", r"G:\datasets\VOCdevkit\VOC2012\JPEGImages\2007_000033.jpg",
r"G:\datasets\VOCdevkit\VOC2012\JPEGImages\2007_000042.jpg", r"G:\datasets\VOCdevkit\VOC2012\JPEGImages\2007_000925.jpg",
r"G:\datasets\VOCdevkit\VOC2012\JPEGImages\2007_000123.jpg"]
anno_paths = [r"G:\datasets\VOCdevkit\VOC2012\Annotations\2007_002105.xml", r"G:\datasets\VOCdevkit\VOC2012\Annotations\2007_000033.xml",
r"G:\datasets\VOCdevkit\VOC2012\Annotations\2007_000042.xml", r"G:\datasets\VOCdevkit\VOC2012\Annotations\2007_000925.xml",
r"G:\datasets\VOCdevkit\VOC2012\Annotations\2007_000123.xml"]
indices = [0, 1, 2, 3, 4]
print(f"原图像路径数量:len(img_paths), 原标签路径数量: len(anno_paths)")
remove_img(img_paths, anno_paths)
print(f"处理后的图像路径数量:len(img_paths), 处理后的标签路径数量: len(anno_paths)")
img_size = 640
orgin_img, orgin_boxes = mosaic(img_size, img_paths, anno_paths, 1, indices)
scale_img, scale_boxes = scale(orgin_img, orgin_boxes, scaleFill=True)
show_img_boxes("mosaic img", orgin_img, orgin_boxes)
show_img_boxes("mosaic&scale img", scale_img, scale_boxes)
程序运行结果如下:
![在这里插入图片描述](https://img-blog.csdnimg.cn/eb43a9c91d9b4f079ee614c7f4ef1974.jpeg#pic_center) Figure3:mosaic处理后的图像,图像的高和宽分别是1280*1280 ![在这里插入图片描述](https://img-blog.csdnimg.cn/d2307eda4cfd43ca98e8f953c9e9de5e.jpeg#pic_center) Figure4:mosaic和缩放处理后的图像,图像的高和宽分别是640*640mixup数据增强方法
mixup方法首先会生成一个随机透明度,然后利用该透明度分别改变2张输入图像的透明度,最后将2张图像重叠起来得到新图像。
mixup示例代码如下,参考自https://github.com/ultralytics/yolov5/blob/master/utils/augmentations.py#L289:
# mixup
def mixup(img1, label1, img2, label2):
r = np.random.beta(32.0, 32.0)
img = (img1 * r + img2 * (1 - r)).astype(np.uint8)
labels = np.concatenate((label1, label2), 0)
return img, labels
if __name__ == "__main__":
img_paths = [r"G:\datasets\VOCdevkit\VOC2012\JPEGImages\2007_002105.jpg", r"G:\datasets\VOCdevkit\VOC2012\JPEGImages\2007_000033.jpg",
r"G:\datasets\VOCdevkit\VOC2012\JPEGImages\2007_000042.jpg", r"G:\datasets\VOCdevkit\VOC2012\JPEGImages\2007_000925.jpg",
r"G:\datasets\VOCdevkit\VOC2012\JPEGImages\2007_000123.jpg"]
anno_paths = [r"G:\datasets\VOCdevkit\VOC2012\Annotations\2007_002105.xml", r"G:\datasets\VOCdevkit\VOC2012\Annotations\2007_000033.xml",
r"G:\datasets\VOCdevkit\VOC2012\Annotations\2007_000042.xml", r"G:\datasets\VOCdevkit\VOC2012\Annotations\2007_000925.xml",
r"G:\datasets\VOCdevkit\VOC2012\Annotations\2007_000123.xml"]
indices = [0, 1, 2, 3, 4]
print(f"原图像路径数量:len(img_paths), 原标签路径数量: len(anno_paths)")
remove_img(img_paths, anno_paths)
print(f"处理后的图像路径数量:len(img_paths), 处理后的标签路径数量: len(anno_paths)")
img_size = 640
img1 = cv2.imread(img_paths[0])
boxes1 = load_box(anno_paths[0])
img1, boxes1 = scale(img1, boxes1, scaleFill=True)
img2 = cv2.imread(img_paths[1])
boxes2 = load_box(anno_paths[1])
img2, boxes2 = scale(img2, boxes2, scaleFill=True)
mixup_img, mixup_boxes = mixup(img1, boxes1, img2, boxes2)
show_img_boxes("mixup img", mixup_img, mixup_boxes)
程序运行结果如下:
Figure5:mixup处理后的图像,图像的高和宽分别是640*640