影像裁剪及数据扩充(针对DOTAv1.5进行部分优化)
概述:不进行重叠裁剪(重叠裁剪会大量增加小汽车等小目标的数量,造成数量差距过大);影像中心裁剪一张(通常目标会出现在影像中间附近,保留在影像中心的较大目标不被裁剪)。
1.加影像中心裁剪的裁剪算法
caijian.py代码如下:
# -*- coding: utf-8 -*-
import cv2
import os
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
def caijian(path,path_out,size_w=1024,size_h=1024,step=768):
ims_list=os.listdir(path)
count = 0
for im_list in ims_list:
number = 0
numberz = 0
name = im_list[:-4]
print(name)
img = cv2.imread(ims_path+im_list)
size = img.shape
shao_w = size[1]%step
shao_h = size[0]%step
#将图像补为能够完整裁剪的大小
img0 = cv2.copyMakeBorder(img,0,size_h-shao_h,0,size_w-shao_w,cv2.BORDER_CONSTANT,value=(113,113,113))
size0 = img0.shape
count = count + 1
for h in range(0,size[0]-(size_h-step),step):
star_h = h
for w in range(0,size[1]-(size_w-step),step):
star_w = w
end_h = star_h + size_h
end_w = star_w + size_w
cropped = img0[star_h:end_h, star_w:end_w]
name_img = name + '_'+ str(star_h) +'_' + str(star_w)
cv2.imwrite('{}/{}.png'.format(path_out,name_img),cropped)
number = number + 1
#影像中心裁剪一张
if size[0]>=size_h and size[1]>=size_w:
mid_h = int(size[0]/2)
mid_w = int(size[1]/2)
star_h = int(mid_h-size_h/2)
star_w = int(mid_w-size_w/2)
end_h = star_h + size_h
end_w = star_w + size_w
cropped = img[star_h:end_h, star_w:end_w]
name_img = name + '_'+ str(star_h) +'_' + str(star_w)
cv2.imwrite('{}/{}.png'.format(path_out,name_img),cropped)
numberz = numberz + 1
if size[0]>=size_h and size[1]<size_w:
imgy = cv2.copyMakeBorder(img,0,0,0,size_w-size[1],cv2.BORDER_CONSTANT,value=(113,113,113))
sizey = imgy.shape
mid_h = int(sizey[0]/2)
star_h = int(mid_h-size_h/2)
star_w = 0
end_h = star_h + size_h
end_w = star_w + size_w
cropped = imgy[star_h:end_h, star_w:end_w]
name_img = name + '_'+ str(star_h) +'_' + str(star_w)
cv2.imwrite('{}/{}.png'.format(path_out,name_img),cropped)
numberz = numberz + 1
if size[0]<size_h and size[1]>=size_w:
imgx = cv2.copyMakeBorder(img,0,size_h-size[0],0,0,cv2.BORDER_CONSTANT,value=(113,113,113))
sizex = imgx.shape
mid_w = int(sizex[1]/2)
star_w = int(mid_w-size_w/2)
star_h = 0
end_h = star_h + size_h
end_w = star_w + size_w
cropped = imgx[star_h:end_h, star_w:end_w]
name_img = name + '_'+ str(star_h) +'_' + str(star_w)
cv2.imwrite('{}/{}.png'.format(path_out,name_img),cropped)
numberz = numberz + 1
print('图片{}宽高为{}*{}'.format(name,size[1],size[0]))
print('图片{}补充后宽高为{}*{}'.format(name,size0[1],size0[0]))
print('图片{}切割成{}张'.format(name,number))
print('图片{}切割中心成{}张'.format(name,numberz))
print('共完成{}张图片'.format(count))
if __name__ == '__main__':
ims_path='/media/xuejunda/2c8076c4-abf2-4d0f-89e3-4568b4f029cf/dataset/detection/DOTA/train/images/images/'# 图像数据集的路径
path = '/home/xuejunda/data/VOCdevkit/mydataset/train_all_1024img/'#输出路径
caijian(ims_path,path,size_w=1024,size_h=1024,step=1024)
2.标签数据自动抓取
标签数据抓取规则:
1.在影像上下左右四个边上的目标框(上图1、2、3、4)保留在面积大于1/2的影像上
2.在影像四个角上的目标框(上图5、6、7、8)保留在面积大于4/9的影像上(如上图目标框6的放大示意图)
txttq.py代码如下:
# -*- coding: utf-8 -*-
import cv2
import os
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
#category_set = ['backgroud','plane','small-vehicle','large-vehicle','ship']
#vehicle = ['large-vehicle','small-vehicle']
def tqtxt(path,path_txt,path_out,size_h=1024,size_w=1024):
ims_list=os.listdir(path)
for im_list in ims_list:
name_list = []
name = im_list[:-4]
#print(name1)
#name,h_star,w_star = name1.split('_')
name_list = name.split('_')
if len(name_list)<2:
continue
h = int(name_list[1])
w = int(name_list[2])
#print(name_list)
#img = cv2.imread(ims_path+im_list)
#size = img.shape
txtpath = path_txt + name_list[0] + '.txt'
txt_outpath = path_out + name + '.txt'
f = open(txt_outpath,'a')
with open(txtpath, 'r') as f_in: #打开txt文件
i = 0
lines = f_in.readlines()
#print(len(lines))
#splitlines = [x.strip().split(' ') for x in lines] #根据空格分割
for line in lines:
if i in [0,1]:
f.write(line)
i = i+1
continue
splitline = line.split(' ')
label = splitline[8]
kunnan = splitline[9]
#if label not in category_set:#只书写指定的类别
# category_set.append(label)
# continue
# if label in vehicle:#只书写指定的类别
#print('1')
# label = 'vehicle'
x1 = int(float(splitline[0]))
y1 = int(float(splitline[1]))
x2 = int(float(splitline[2]))
y2 = int(float(splitline[3]))
x3 = int(float(splitline[4]))
y3 = int(float(splitline[5]))
x4 = int(float(splitline[6]))
y4 = int(float(splitline[7]))
xmin = min(x1,x2,x3,x4)
xmax = max(x1,x2,x3,x4)
ymin = min(y1,y2,y3,y4)
ymax = max(y1,y2,y3,y4)
if w-int((xmax-xmin)/2.0)<x1<=w+size_w and w<x2<=w+size_w+int((xmax-xmin)/2.0) and h-int((ymax-ymin)/2.0)<y1<=h+size_h and h<y3<=h+size_h+int((ymax-ymin)/2.0):
#都在图内
if w<x1<=w+size_w and w<x2<=w+size_w and w<x3<=w+size_w and w<x4<=w+size_w and h<y1<=h+size_h and h<y2<=h+size_h and h<y3<=h+size_h and h<y4<=h+size_h:
f.write('{} {} {} {} {} {} {} {} {} {}'.format(float(x1-w),float(y1-h),float(x2-w),float(y2-h),float(x3-w),float(y3-h),float(x4-w),float(y4-h),label,kunnan))
#左边超出图
if w-int((xmax-xmin)/2.0)<x1<=w and w<x2<=w+size_w:
#H在图内
if h<y1<=h+size_h and h<y3<=h+size_h:
f.write('{} {} {} {} {} {} {} {} {} {}'.format(float(1),float(y1-h),float(x2-w),float(y2-h),float(x3-w),float(y3-h),float(1),float(y4-h),label,kunnan))
#H在图左上方,4/9在图内则保留
if w-int((xmax-xmin)/3.0)<x1<=w and w<x2<=w+size_w and h-int((ymax-ymin)/3.0)<y1<=h and h<y3<=h+size_h:
f.write('{} {} {} {} {} {} {} {} {} {}'.format(float(1),float(1),float(x2-w),float(1),float(x3-w),float(y3-h),float(1),float(y4-h),label,kunnan))
#H在图左下方,4/9在图内则保留
if w-int((xmax-xmin)/3.0)<x1<=w and w<x2<=w+size_w and h<y1<=h+size_h and h+size_h<y3<=h+size_h+int((ymax-ymin)/3.0):
f.write('{} {} {} {} {} {} {} {} {} {}'.format(float(1),float(y1-h),float(x2-w),float(y2-h),float(x3-w),float(size_h-1),float(1),float(size_h-1),label,kunnan))
#上面超出图
if h-int((ymax-ymin)/2.0)<y1<=h and h<y3<=h+size_h:
#w在图内
if w<x1<=w+size_w and w<x2<=w+size_w:
f.write('{} {} {} {} {} {} {} {} {} {}'.format(float(x1-w),float(1),float(x2-w),float(1),float(x3-w),float(y3-h),float(x4-w),float(y4-h),label,kunnan))
#w在图右上
if w<x1<=w+size_w and w+size_w<x2<=w+size_w+int((xmax-xmin)/3.0) and h-int((ymax-ymin)/3.0)<y1<=h and h<y3<=h+size_h:
f.write('{} {} {} {} {} {} {} {} {} {}'.format(float(x1-w),float(1),float(size_w-1),float(1),float(size_w-1),float(y3-h),float(x4-w),float(y4-h),label,kunnan))
#右面超出图
if w<x1<=w+size_w and w+size_w<x2<=w+size_w+int((xmax-xmin)/2.0):
#H在图内
if h<y1<=h+size_h and h<y3<=h+size_h:
f.write('{} {} {} {} {} {} {} {} {} {}'.format(float(x1-w),float(y1-h),float(size_w-1),float(y2-h),float(size_w-1),float(y3-h),float(x4-w),float(y4-h),label,kunnan))
#w在图右下
if w<x1<=w+size_w and w+size_w<x2<=w+size_w+int((xmax-xmin)/3.0) and h+size_h<y3<=h+size_h+int((ymax-ymin)/3.0) and h<y1<=h+size_h:
f.write('{} {} {} {} {} {} {} {} {} {}'.format(float(x1-w),float(y1-h),float(size_w-1),float(y2-h),float(size_w-1),float(size_h-1),float(x4-w),float(size_h-1),label,kunnan))
#下面超出图
if h+size_h<y3<=h+size_h+int((ymax-ymin)/2.0) and h<y1<=h+size_h:
#w在图内
if w<x1<=w+size_w and w<x2<=w+size_w:
f.write('{} {} {} {} {} {} {} {} {} {}'.format(float(x1-w),float(y1-h),float(x2-w),float(y2-h),float(x3-w),float(size_h-1),float(x4-w),float(size_h-1),label,kunnan))
f.close()
#print(category_set)
if __name__ == '__main__':
ims_path='/media/xuejunda/2c8076c4-abf2-4d0f-89e3-4568b4f029cf/dataset/detection/DOTA/train/images/img512/'# 图像数据集的路径
txt_path = '/media/xuejunda/2c8076c4-abf2-4d0f-89e3-4568b4f029cf/dataset/detection/DOTA/train/hbb/'#标签数据路径
path = '/media/xuejunda/2c8076c4-abf2-4d0f-89e3-4568b4f029cf/dataset/detection/DOTA/train/txt512/'#txt输出路径
tqtxt(ims_path,txt_path,path,size_h=512,size_w=512)
3.样本量少的类别数据扩充
3.1DOTAv1.5样本量统计结果:
plane: 8072.0
small-vehicle: 126501.0
large-vehicle: 22218.0
roundabout: 437.0
bridge: 2075.0
soccer-ball-field: 338.0
helicopter: 635.0
ground-track-field: 331.0
baseball-diamond: 412.0
storage-tank: 5346.0
tennis-court: 2425.0
swimming-pool: 2181.0
ship: 32973.0
harbor: 6016.0
basketball-court: 529.0
container-crane: 142.0
3.2挑选样本量小于1000的类别,进行数据扩充
我是挑选的裁剪之后的影像,挑选代码如下:
# -*- coding: utf-8 -*-
import os
import shutil
from PIL import Image
import matplotlib.pyplot as plt
category_set = ['roundabout','soccer-ball-field','helicopter','ground-track-field','baseball-diamond','basketball-court','container-crane']
def shaixuan(path_txt,path_txt_out,path_img,path_img_out):
ims_list=os.listdir(path_txt)
for im_list in ims_list:
name = im_list[:-4]
txtpath = path_txt + im_list
txtpathout = path_txt_out + im_list
imgpath = path_img + name + '.png'
imgpathout = path_img_out + name + '.png'
with open(txtpath, 'r') as f: #打开txt文件
i = 0
lines = f.readlines()
#print(len(lines))
#splitlines = [x.strip().split(' ') for x in lines] #根据空格分割
for line in lines:
if i in [0,1]:
i = i+1
continue
splitline = line.split(' ')
label = splitline[8]
kunnan = splitline[9]
if label in category_set:#只书写指定的类别
shutil.copyfile(txtpath, txtpathout)
shutil.copyfile(imgpath, imgpathout)
break
if __name__ == '__main__':
path_txt = '/home/xuejunda/data/VOCdevkit/mydataset/train_all_1024txt/'
path_txt_out = '/home/xuejunda/data/VOCdevkit/mydataset/kuochongtxt/'
path_img = '/home/xuejunda/data/VOCdevkit/mydataset/train_all_1024img/'
path_img_out = '/home/xuejunda/data/VOCdevkit/mydataset/kuochongimg/'
shaixuan(path_txt,path_txt_out,path_img,path_img_out)
3.3数据扩充
利用图像旋转以及镜像的方式进行数据扩充,上图表明共可以产生除原图外额外7张不同的图像(分别为原图的3次旋转以及任意一种镜像之后的4次旋转)。
数据扩充代码如下:
# -*- coding: utf-8 -*-
import cv2
import os
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
def kuochong(img1,img_out,txt,txt_out,size_h=1024,size_w=1024):
ims_list=os.listdir(img1)
for im_list in ims_list:
name = im_list[:-4]
txtpath = txt + name + '.txt'
img = cv2.imread(img1+im_list)
img_zz = cv2.transpose(img)
#zz
txtout_zz = txt_out + name + '_zz' '.txt'
imgout_zz = img_out + name + '_zz' + '.png'
cv2.imwrite(imgout_zz,img_zz)
f = open(txtout_zz,'a')
with open(txtpath, 'r') as f_in:
i = 0
lines = f_in.readlines()
for line in lines:
if i in [0,1]:
f.write(line)
i = i+1
continue
splitline = line.split(' ')
label = splitline[8]
kunnan = splitline[9]
x1 = int(float(splitline[0]))
y1 = int(float(splitline[1]))
x2 = int(float(splitline[2]))
y2 = int(float(splitline[3]))
x3 = int(float(splitline[4]))
y3 = int(float(splitline[5]))
x4 = int(float(splitline[6]))
y4 = int(float(splitline[7]))
f.write('{} {} {} {} {} {} {} {} {} {}'.format(float(y1),float(x1),float(y4),float(x4),float(y3),float(x3),float(y2),float(x2),label,kunnan))
f.close()
#zs90
txtout_zs90 = txt_out + name + '_zs90' '.txt'
imgout_zs90 = img_out + name + '_zs90' + '.png'
img_zs90 = cv2.flip(img_zz,1)
cv2.imwrite(imgout_zs90,img_zs90)
f = open(txtout_zs90,'a')
with open(txtpath, 'r') as f_in:
i = 0
lines = f_in.readlines()
for line in lines:
if i in [0,1]:
f.write(line)
i = i+1
continue
splitline = line.split(' ')
label = splitline[8]
kunnan = splitline[9]
x1 = int(float(splitline[0]))
y1 = int(float(splitline[1]))
x2 = int(float(splitline[2]))
y2 = int(float(splitline[3]))
x3 = int(float(splitline[4]))
y3 = int(float(splitline[5]))
x4 = int(float(splitline[6]))
y4 = int(float(splitline[7]))
f.write('{} {} {} {} {} {} {} {} {} {}'.format(float(size_h-y4),float(x4),float(size_h-y1),float(x1),float(size_h-y2),float(x2),float(size_h-y3),float(x3),label,kunnan))
f.close()
#zs180
txtout_zs180 = txt_out + name + '_zs180' '.txt'
imgout_zs180 = img_out + name + '_zs180' + '.png'
img_zs180 = cv2.flip(img,-1)
cv2.imwrite(imgout_zs180,img_zs180)
f = open(txtout_zs180,'a')
with open(txtpath, 'r') as f_in:
i = 0
lines = f_in.readlines()
for line in lines:
if i in [0,1]:
f.write(line)
i = i+1
continue
splitline = line.split(' ')
label = splitline[8]
kunnan = splitline[9]
x1 = int(float(splitline[0]))
y1 = int(float(splitline[1]))
x2 = int(float(splitline[2]))
y2 = int(float(splitline[3]))
x3 = int(float(splitline[4]))
y3 = int(float(splitline[5]))
x4 = int(float(splitline[6]))
y4 = int(float(splitline[7]))
f.write('{} {} {} {} {} {} {} {} {} {}'.format(float(size_w-x3),float(size_h-y3),float(size_w-x4),float(size_h-y4),float(size_w-x1),float(size_h-y1),float(size_w-x2),float(size_h-y2),label,kunnan))
f.close()
#zs270
txtout_zs270 = txt_out + name + '_zs270' '.txt'
imgout_zs270 = img_out + name + '_zs270' + '.png'
img_zs270 = cv2.flip(img_zz,0)
cv2.imwrite(imgout_zs270,img_zs270)
f = open(txtout_zs270,'a')
with open(txtpath, 'r') as f_in:
i = 0
lines = f_in.readlines()
for line in lines:
if i in [0,1]:
f.write(line)
i = i+1
continue
splitline = line.split(' ')
label = splitline[8]
kunnan = splitline[9]
x1 = int(float(splitline[0]))
y1 = int(float(splitline[1]))
x2 = int(float(splitline[2]))
y2 = int(float(splitline[3]))
x3 = int(float(splitline[4]))
y3 = int(float(splitline[5]))
x4 = int(float(splitline[6]))
y4 = int(float(splitline[7]))
f.write('{} {} {} {} {} {} {} {} {} {}'.format(float(y2),float(size_w-x2),float(y3),float(size_w-x3),float(y4),float(size_w-x4),float(y1),float(size_w-x1),label,kunnan))
f.close()
#zz90
txtout_zz90 = txt_out + name + '_zz90' '.txt'
imgout_zz90 = img_out + name + '_zz90' + '.png'
img_zz90 = cv2.flip(img,1)
cv2.imwrite(imgout_zz90,img_zz90)
f = open(txtout_zz90,'a')
with open(txtpath, 'r') as f_in:
i = 0
lines = f_in.readlines()
for line in lines:
if i in [0,1]:
f.write(line)
i = i+1
continue
splitline = line.split(' ')
label = splitline[8]
kunnan = splitline[9]
x1 = int(float(splitline[0]))
y1 = int(float(splitline[1]))
x2 = int(float(splitline[2]))
y2 = int(float(splitline[3]))
x3 = int(float(splitline[4]))
y3 = int(float(splitline[5]))
x4 = int(float(splitline[6]))
y4 = int(float(splitline[7]))
f.write('{} {} {} {} {} {} {} {} {} {}'.format(float(size_w-x2),float(y2),float(size_w-x1),float(y1),float(size_w-x4),float(y4),float(size_w-x3),float(y3),label,kunnan))
f.close()
#zz180
txtout_zz180 = txt_out + name + '_zz180' '.txt'
imgout_zz180 = img_out + name + '_zz180' + '.png'
img_zz180 = cv2.flip(img_zz,-1)
cv2.imwrite(imgout_zz180,img_zz180)
f = open(txtout_zz180,'a')
with open(txtpath, 'r') as f_in:
i = 0
lines = f_in.readlines()
for line in lines:
if i in [0,1]:
f.write(line)
i = i+1
continue
splitline = line.split(' ')
label = splitline[8]
kunnan = splitline[9]
x1 = int(float(splitline[0]))
y1 = int(float(splitline[1]))
x2 = int(float(splitline[2]))
y2 = int(float(splitline[3]))
x3 = int(float(splitline[4]))
y3 = int(float(splitline[5]))
x4 = int(float(splitline[6]))
y4 = int(float(splitline[7]))
f.write('{} {} {} {} {} {} {} {} {} {}'.format(float(size_h-y3),float(size_w-x3),float(size_h-y2),float(size_w-x2),float(size_h-y1),float(size_w-x1),float(size_h-y4),float(size_w-x4),label,kunnan))
f.close()
#zz270
txtout_zz270 = txt_out + name + '_zz270' '.txt'
imgout_zz270 = img_out + name + '_zz270' + '.png'
img_zz270 = cv2.flip(img,0)
cv2.imwrite(imgout_zz270,img_zz270)
f = open(txtout_zz270,'a')
with open(txtpath, 'r') as f_in:
i = 0
lines = f_in.readlines()
for line in lines:
if i in [0,1]:
f.write(line)
i = i+1
continue
splitline = line.split(' ')
label = splitline[8]
kunnan = splitline[9]
x1 = int(float(splitline[0]))
y1 = int(float(splitline[1]))
x2 = int(float(splitline[2]))
y2 = int(float(splitline[3]))
x3 = int(float(splitline[4]))
y3 = int(float(splitline[5]))
x4 = int(float(splitline[6]))
y4 = int(float(splitline[7]))
f.write('{} {} {} {} {} {} {} {} {} {}'.format(float(x4),float(size_h-y4),float(x3),float(size_h-y3),float(x2),float(size_h-y2),float(x1),float(size_h-y1),label,kunnan))
f.close()
if __name__ == '__main__':
img1='/home/xuejunda/data/VOCdevkit/mydataset/kuochongimg/'# 图像数据集的路径
img_out = '/home/xuejunda/data/VOCdevkit/mydataset/kuochonghouimg/'
txt = '/home/xuejunda/data/VOCdevkit/mydataset/kuochongtxt/'
txt_out = '/home/xuejunda/data/VOCdevkit/mydataset/kuochonghoutxt/'
kuochong(img1,img_out,txt,txt_out,size_h=1024,size_w=1024)
扩充后样本量:
plane: 9862.0
small-vehicle: 179726.0
large-vehicle: 30701.0
roundabout: 3768.0
bridge: 2619.0
soccer-ball-field: 2432.0
helicopter: 5040.0
ground-track-field: 2320.0
baseball-diamond: 3624.0
storage-tank: 6410.0
tennis-court: 7256.0
swimming-pool: 2932.0
ship: 39350.0
harbor: 6734.0
basketball-court: 4824.0
container-crane: 1208.0