数据增强目的是人工扩展样本,提高模型鲁棒性,降低过拟合风险,其方法有:random erasing、 cutout、 hide-and-seek、grid mask、Adversarial Erasing、mixup、cutmix、mosaic、Stylized-ImageNet、label smooth和dropblock。
1、random erasing
"""
re=w/h
s=w*h=re*h*h=w*w/r
h=sqrt(s/r)
w=sqrt(s*r)
"""
def random_erase(img,p=0.8,sl=0.02,sh=0.5,re=0.3):
if random.uniform(0,1)>p:
return img
img_area=img.shape[0]*img.shape[1]
while True:
erase_area=random.uniform(sl,sh)*img_area
we,he=int(math.sqrt(erase_area*re)),int(math.sqrt(erase_area/re))
xe=random.randint(0,img.shape[1])
ye=random.randint(0,img.shape[0])
if we+xe<=img.shape[1] and ye+he<=img.shape[0]:
img[ye:ye+he,xe:xe+we,:]=np.random.randint(0,255,size=(he,we,img.shape[2]))
return img
img=cv2.imread("D:/data/testpic/1.png")
print(img.shape[0],img.shape[1])
img1=random_erase(img.copy())
cv2.imshow('test',img1)
cv2.waitKey()
2、CutMix
https://github.com/clovaai/CutMix-PyTorch
3、mosaic数据增强
目的将4张图片合成一张图片并整合标签
首先创建一个空的画布,在某一范围内随机选择一个点作为mosaic图片的中心点(xc,yc),以该中心点可将画布分为4块
其中各块的坐标表示如下:
考虑到4块区域与4张图宽高不匹配,可能需要截取图中一部分并合成。
合成后的标签框相对位置改变。通过计算宽高(pw,ph)在合成后图片的标签框相对位置。
import os
import cv2
import numpy as np
import random
def load_img_label(trainpath):
global img4, y1a, y2a, x1a, x2a, y1b, y2b, x2b, x1b
with open(trainpath,'r') as f:
imgpath=[line.strip() for line in f.readlines()]
labelpath=[x.replace('images','labels').replace('jpg','txt').replace('png','txt') for x in imgpath]
filename=[os.path.splitext(os.path.split(fn)[1])[0] for fn in imgpath]
print(imgpath)
print(labelpath)
print(filename)
for i in range(0,len(filename),4):
file=filename[i:i+4]
imgp=[]
labels=[]
for j in file:
imgp.append([p for p in imgpath if os.path.splitext(os.path.split(p)[1])[0]==j][0])
lb=[p for p in labelpath if os.path.splitext(os.path.split(p)[1])[0]==j][0]
with open(lb,'r') as f:
label=[line.strip().split(' ') for line in f.readlines()]
# label=
labels.append(label)
#
s = 416
xc,yc=[int(random.uniform(s*0.5,s*1.5)) for _ in range(2)]
for i in range(4):
img=cv2.imread(imgp[i])
H,W=img.shape[:2]
if i==0:
img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8)
x1a,y1a,x2a,y2a=max(xc-W,0),max(yc-H,0),xc,yc
#原图截取区域
x1b, y1b, x2b, y2b = W-(x2a-x1a), H-(y2a-y1a),W,H
elif i == 1:
x1a, y1a, x2a, y2a = xc, max(yc - H, 0), min(xc+W,s*2), yc
x1b, y1b, x2b, y2b = 0, H - (y2a - y1a), min(W, x2a - x1a), H
elif i == 2:
x1a, y1a, x2a, y2a = max(xc - W, 0), yc, xc, min(yc+H,s*2)
x1b, y1b, x2b, y2b = W - (x2a - x1a), 0, max(xc, W), min(y2a - y1a, H)
elif i == 3:
x1a, y1a, x2a, y2a = xc, yc,min(xc+W,s*2), min(yc+H,s*2)
x1b, y1b, x2b, y2b = 0, 0, min(W, x2a - x1a), min(y2a - y1a, H)
img4[y1a:y2a,x1a:x2a]=img[y1b:y2b,x1b:x2b]
#相对于原图位置是增加还是减少
padw=x1a-x1b
padh=y1a-y1b
for lb in labels[i]:
# for box in lb:
x,y,w,h=float(lb[1]),float(lb[2]),float(lb[3]),float(lb[4])
x1,y1=int((x-w/2)*W)+padw,int((y-h/2)*H)+padh
x2, y2 = int((x + w / 2) * W)+padw, int((y + h / 2) * H)+padh
cv2.rectangle(img4, (x1, y1), (x2, y2), (255, 0, 255), 1)
cv2.imshow('test',img4)
cv2.imwrite('./res.jpg',img4)
cv2.waitKey()
# print(labels)
# imgpath='D:/data/Mydata/images'
trainpath='D:/data/Mydata/train.txt'
load_img_label(trainpath)
以上存在一些缺点,有些合成后的标签框只显示部分,应该尽量将这部分剔除。合成后的图片还需要进行翻转、缩放、色域变换等操作。
未完待续。。。
参考:
https://zhuanlan.zhihu.com/p/41679153
https://www.cnblogs.com/monologuesmw/p/12932407.html