学习前言
哈哈哈!我又来数据增强了!
代码下载
https://github.com/bubbliiiing/object-detection-augmentation
什么是Mosaic数据增强方法
Yolov4的mosaic数据增强参考了CutMix数据增强方式,理论上具有一定的相似性!
CutMix数据增强方式利用两张图片进行拼接。
但是mosaic利用了四张图片,根据论文所说其拥有一个巨大的优点是丰富检测物体的背景!且在BN计算的时候一下子会计算四张图片的数据!
就像下图这样:
实现思路
1、每次读取四张图片。
2、分别对四张图片进行翻转、缩放、色域变化等,并且按照四个方向位置摆好。
3、进行图片的组合和框的组合
全部代码
这里的代码直接复制无法使用,请到Github下载。
1、数据增强
该部分为数据增强。
import cv2
import numpy as np
from PIL import Image, ImageDraw
def rand(a=0, b=1):
return np.random.rand()*(b-a) + a
def merge_bboxes(bboxes, cutx, cuty):
merge_bbox = []
for i in range(len(bboxes)):
for box in bboxes[i]:
tmp_box = []
x1, y1, x2, y2 = box[0], box[1], box[2], box[3]
if i == 0:
if y1 > cuty or x1 > cutx:
continue
if y2 >= cuty and y1 <= cuty:
y2 = cuty
if x2 >= cutx and x1 <= cutx:
x2 = cutx
if i == 1:
if y2 < cuty or x1 > cutx:
continue
if y2 >= cuty and y1 <= cuty:
y1 = cuty
if x2 >= cutx and x1 <= cutx:
x2 = cutx
if i == 2:
if y2 < cuty or x2 < cutx:
continue
if y2 >= cuty and y1 <= cuty:
y1 = cuty
if x2 >= cutx and x1 <= cutx:
x1 = cutx
if i == 3:
if y1 > cuty or x2 < cutx:
continue
if y2 >= cuty and y1 <= cuty:
y2 = cuty
if x2 >= cutx and x1 <= cutx:
x1 = cutx
tmp_box.append(x1)
tmp_box.append(y1)
tmp_box.append(x2)
tmp_box.append(y2)
tmp_box.append(box[-1])
merge_bbox.append(tmp_box)
return merge_bbox
def get_random_data_with_Mosaic(annotation_line, input_shape, jitter=0.3, hue=.1, sat=0.7, val=0.4):
h, w = input_shape
min_offset_x = rand(0.3, 0.7)
min_offset_y = rand(0.3, 0.7)
image_datas = []
box_datas = []
index = 0
for line in annotation_line:
#---------------------------------#
# 每一行进行分割
#---------------------------------#
line_content = line.split()
#---------------------------------#
# 打开图片
#---------------------------------#
image = Image.open(line_content[0])
image = image.convert('RGB')
#---------------------------------#
# 图片的大小
#---------------------------------#
iw, ih = image.size
#---------------------------------#
# 保存框的位置
#---------------------------------#
box = np.array([np.array(list(map(int,box.split(',')))) for box in line_content[1:]])
#---------------------------------#
# 是否翻转图片
#---------------------------------#
flip = rand()<.5
if flip and len(box)>0:
image = image.transpose(Image.FLIP_LEFT_RIGHT)
box[:, [0,2]] = iw - box[:, [2,0]]
#------------------------------------------#
# 对图像进行缩放并且进行长和宽的扭曲
#------------------------------------------#
new_ar = iw/ih * rand(1-jitter,1+jitter) / rand(1-jitter,1+jitter)
scale = rand(.4, 1)
if new_ar < 1:
nh = int(scale*h)
nw = int(nh*new_ar)
else:
nw = int(scale*w)
nh = int(nw/new_ar)
image = image.resize((nw, nh), Image.BICUBIC)
#-----------------------------------------------#
# 将图片进行放置,分别对应四张分割图片的位置
#-----------------------------------------------#
if index == 0:
dx = int(w*min_offset_x) - nw
dy = int(h*min_offset_y) - nh
elif index == 1:
dx = int(w*min_offset_x) - nw
dy = int(h*min_offset_y)
elif index == 2:
dx = int(w*min_offset_x)
dy = int(h*min_offset_y)
elif index == 3:
dx = int(w*min_offset_x)
dy = int(h*min_offset_y) - nh
new_image = Image.new('RGB', (w,h), (128,128,128))
new_image.paste(image, (dx, dy))
image_data = np.array(new_image)
index = index + 1
box_data = []
#---------------------------------#
# 对box进行重新处理
#---------------------------------#
if len(box)>0:
np.random.shuffle(box)
box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
box[:, 0:2][box[:, 0:2]<0] = 0
box[:, 2][box[:, 2]>w] = w
box[:, 3][box[:, 3]>h] = h
box_w = box[:, 2] - box[:, 0]
box_h = box[:, 3] - box[:, 1]
box = box[np.logical_and(box_w>1, box_h>1)]
box_data = np.zeros((len(box),5))
box_data[:len(box)] = box
image_datas.append(image_data)
box_datas.append(box_data)
#---------------------------------#
# 将图片分割,放在一起
#---------------------------------#
cutx = int(w * min_offset_x)
cuty = int(h * min_offset_y)
new_image = np.zeros([h, w, 3])
new_image[:cuty, :cutx, :] = image_datas[0][:cuty, :cutx, :]
new_image[cuty:, :cutx, :] = image_datas[1][cuty:, :cutx, :]
new_image[cuty:, cutx:, :] = image_datas[2][cuty:, cutx:, :]
new_image[:cuty, cutx:, :] = image_datas[3][:cuty, cutx:, :]
new_image = np.array(new_image, np.uint8)
#---------------------------------#
# 对图像进行色域变换
# 计算色域变换的参数
#---------------------------------#
r = np.random.uniform(-1, 1, 3) * [hue, sat, val] + 1
#---------------------------------#
# 将图像转到HSV上
#---------------------------------#
hue, sat, val = cv2.split(cv2.cvtColor(new_image, cv2.COLOR_RGB2HSV))
dtype = new_image.dtype
#---------------------------------#
# 应用变换
#---------------------------------#
x = np.arange(0, 256, dtype=r.dtype)
lut_hue = ((x * r[0]) % 180).astype(dtype)
lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
new_image = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)))
new_image = cv2.cvtColor(new_image, cv2.COLOR_HSV2RGB)
#---------------------------------#
# 对框进行进一步的处理
#---------------------------------#
new_boxes = merge_bboxes(box_datas, cutx, cuty)
return new_image, new_boxes
2、调用代码
该部分为调用代码
import os
from random import sample
import numpy as np
from PIL import Image, ImageDraw
from utils.random_data import get_random_data, get_random_data_with_Mosaic
from utils.utils import convert_annotation, get_classes
#-----------------------------------------------------------------------------------#
# Origin_VOCdevkit_path 原始数据集所在的路径
#-----------------------------------------------------------------------------------#
Origin_VOCdevkit_path = "VOCdevkit_Origin"
#-----------------------------------------------------------------------------------#
# input_shape 生成的图片大小。
#-----------------------------------------------------------------------------------#
input_shape = [640, 640]
if __name__ == "__main__":
Origin_JPEGImages_path = os.path.join(Origin_VOCdevkit_path, "VOC2007/JPEGImages")
Origin_Annotations_path = os.path.join(Origin_VOCdevkit_path, "VOC2007/Annotations")
#---------------------------#
# 遍历标签并赋值
#---------------------------#
xml_names = os.listdir(Origin_Annotations_path)
#------------------------------#
# 获取4个图像与标签
#------------------------------#
sample_xmls = sample(xml_names, 4)
unique_labels = get_classes(sample_xmls, Origin_Annotations_path)
annotation_line = []
for xml in sample_xmls:
line = convert_annotation(os.path.join(Origin_JPEGImages_path, os.path.splitext(xml)[0] + '.jpg'), os.path.join(Origin_Annotations_path, xml), unique_labels)
annotation_line.append(line)
#------------------------------#
# 合并mosaic
#------------------------------#
image_data, box_data = get_random_data_with_Mosaic(annotation_line, input_shape)
img = Image.fromarray(image_data.astype(np.uint8))
for j in range(len(box_data)):
thickness = 3
left, top, right, bottom = box_data[j][0:4]
draw = ImageDraw.Draw(img)
for i in range(thickness):
draw.rectangle([left + i, top + i, right - i, bottom - i],outline=(255, 255, 255))
img.show()