影像裁剪及数据扩充(针对DOTAv1.5进行部分优化)

概述:不进行重叠裁剪(重叠裁剪会大量增加小汽车等小目标的数量,造成数量差距过大);影像中心裁剪一张(通常目标会出现在影像中间附近,保留在影像中心的较大目标不被裁剪)。

1.加影像中心裁剪的裁剪算法

caijian.py代码如下:

# -*- coding: utf-8 -*-
import cv2
import os
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
def caijian(path,path_out,size_w=1024,size_h=1024,step=768):
    ims_list=os.listdir(path)
    count = 0
    for im_list in ims_list:
        number = 0
        numberz = 0
        name = im_list[:-4]
        print(name)
        img = cv2.imread(ims_path+im_list)
        size = img.shape
        shao_w = size[1]%step
        shao_h = size[0]%step
        #将图像补为能够完整裁剪的大小
        img0 = cv2.copyMakeBorder(img,0,size_h-shao_h,0,size_w-shao_w,cv2.BORDER_CONSTANT,value=(113,113,113))
        size0 = img0.shape
        count = count + 1
        for h in range(0,size[0]-(size_h-step),step):
            star_h = h
            for w in range(0,size[1]-(size_w-step),step):
                star_w = w
                end_h = star_h + size_h
                end_w = star_w + size_w
                cropped = img0[star_h:end_h, star_w:end_w]
                name_img = name + '_'+ str(star_h) +'_' + str(star_w)
                cv2.imwrite('{}/{}.png'.format(path_out,name_img),cropped)
                number = number + 1 
                #影像中心裁剪一张  
        if size[0]>=size_h and size[1]>=size_w:
             mid_h = int(size[0]/2)
             mid_w = int(size[1]/2)
             star_h = int(mid_h-size_h/2)
             star_w = int(mid_w-size_w/2)
             end_h = star_h + size_h
             end_w = star_w + size_w       
             cropped = img[star_h:end_h, star_w:end_w]
             name_img = name + '_'+ str(star_h) +'_' + str(star_w)
             cv2.imwrite('{}/{}.png'.format(path_out,name_img),cropped)
             numberz = numberz + 1
        if size[0]>=size_h and size[1]<size_w:
             imgy = cv2.copyMakeBorder(img,0,0,0,size_w-size[1],cv2.BORDER_CONSTANT,value=(113,113,113))
             sizey = imgy.shape
             mid_h = int(sizey[0]/2)
             star_h = int(mid_h-size_h/2)
             star_w = 0
             end_h = star_h + size_h
             end_w = star_w + size_w       
             cropped = imgy[star_h:end_h, star_w:end_w]
             name_img = name + '_'+ str(star_h) +'_' + str(star_w)
             cv2.imwrite('{}/{}.png'.format(path_out,name_img),cropped)
             numberz = numberz + 1
        if size[0]<size_h and size[1]>=size_w:
             imgx = cv2.copyMakeBorder(img,0,size_h-size[0],0,0,cv2.BORDER_CONSTANT,value=(113,113,113))
             sizex = imgx.shape
             mid_w = int(sizex[1]/2)
             star_w = int(mid_w-size_w/2)
             star_h = 0
             end_h = star_h + size_h
             end_w = star_w + size_w       
             cropped = imgx[star_h:end_h, star_w:end_w]
             name_img = name + '_'+ str(star_h) +'_' + str(star_w)
             cv2.imwrite('{}/{}.png'.format(path_out,name_img),cropped)
             numberz = numberz + 1
        print('图片{}宽高为{}*{}'.format(name,size[1],size[0]))      
        print('图片{}补充后宽高为{}*{}'.format(name,size0[1],size0[0]))   
        print('图片{}切割成{}张'.format(name,number)) 
        print('图片{}切割中心成{}张'.format(name,numberz)) 
        print('共完成{}张图片'.format(count))      

if __name__ == '__main__':
    ims_path='/media/xuejunda/2c8076c4-abf2-4d0f-89e3-4568b4f029cf/dataset/detection/DOTA/train/images/images/'# 图像数据集的路径
    path = '/home/xuejunda/data/VOCdevkit/mydataset/train_all_1024img/'#输出路径      
    caijian(ims_path,path,size_w=1024,size_h=1024,step=1024)

2.标签数据自动抓取

标签数据抓取规则:

在这里插入图片描述

1.在影像上下左右四个边上的目标框(上图1、2、3、4)保留在面积大于1/2的影像上
2.在影像四个角上的目标框(上图5、6、7、8)保留在面积大于4/9的影像上(如上图目标框6的放大示意图)
txttq.py代码如下:

# -*- coding: utf-8 -*-
import cv2
import os
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
#category_set = ['backgroud','plane','small-vehicle','large-vehicle','ship']
#vehicle = ['large-vehicle','small-vehicle'] 
def tqtxt(path,path_txt,path_out,size_h=1024,size_w=1024):
    ims_list=os.listdir(path)
    for im_list in ims_list:
        name_list = []
        name = im_list[:-4]
        #print(name1)
        #name,h_star,w_star = name1.split('_')
        name_list = name.split('_')
        if len(name_list)<2:
            continue
        h = int(name_list[1])
        w = int(name_list[2])
        #print(name_list)
        #img = cv2.imread(ims_path+im_list)
        #size = img.shape
        txtpath = path_txt + name_list[0] + '.txt'
        txt_outpath = path_out + name + '.txt'
        f = open(txt_outpath,'a')
        with open(txtpath, 'r') as f_in:   #打开txt文件          
             i = 0
             lines = f_in.readlines()
             #print(len(lines))
             #splitlines = [x.strip().split(' ') for x in lines]  #根据空格分割
             for line  in lines:
                 if i in [0,1]:
                     f.write(line)
                     i = i+1
                     continue
                 splitline = line.split(' ')
                 label = splitline[8]
                 kunnan = splitline[9]
                 #if label not in category_set:#只书写指定的类别  
                 #    category_set.append(label)   
                  #   continue 
                  
                # if label in vehicle:#只书写指定的类别
                     #print('1')  
                  #   label = 'vehicle'            
                 x1 = int(float(splitline[0]))
                 y1 = int(float(splitline[1]))
                 x2 = int(float(splitline[2]))
                 y2 = int(float(splitline[3]))
                 x3 = int(float(splitline[4]))
                 y3 = int(float(splitline[5]))  
                 x4 = int(float(splitline[6]))
                 y4 = int(float(splitline[7]))
                 xmin = min(x1,x2,x3,x4)
                 xmax = max(x1,x2,x3,x4)
                 ymin = min(y1,y2,y3,y4)
                 ymax = max(y1,y2,y3,y4)
                 if w-int((xmax-xmin)/2.0)<x1<=w+size_w and w<x2<=w+size_w+int((xmax-xmin)/2.0) and h-int((ymax-ymin)/2.0)<y1<=h+size_h and h<y3<=h+size_h+int((ymax-ymin)/2.0):                     
                     #都在图内
                     if w<x1<=w+size_w and w<x2<=w+size_w and w<x3<=w+size_w and w<x4<=w+size_w and h<y1<=h+size_h and h<y2<=h+size_h and h<y3<=h+size_h and h<y4<=h+size_h:                     
                         f.write('{} {} {} {} {} {} {} {} {} {}'.format(float(x1-w),float(y1-h),float(x2-w),float(y2-h),float(x3-w),float(y3-h),float(x4-w),float(y4-h),label,kunnan))
                     #左边超出图
                     if w-int((xmax-xmin)/2.0)<x1<=w and w<x2<=w+size_w: 
                         #H在图内
                         if h<y1<=h+size_h and h<y3<=h+size_h:    
                             f.write('{} {} {} {} {} {} {} {} {} {}'.format(float(1),float(y1-h),float(x2-w),float(y2-h),float(x3-w),float(y3-h),float(1),float(y4-h),label,kunnan))
                         #H在图左上方,4/9在图内则保留
                         if w-int((xmax-xmin)/3.0)<x1<=w and w<x2<=w+size_w and h-int((ymax-ymin)/3.0)<y1<=h and h<y3<=h+size_h:                     
                             f.write('{} {} {} {} {} {} {} {} {} {}'.format(float(1),float(1),float(x2-w),float(1),float(x3-w),float(y3-h),float(1),float(y4-h),label,kunnan))
                         #H在图左下方,4/9在图内则保留
                         if w-int((xmax-xmin)/3.0)<x1<=w and w<x2<=w+size_w and h<y1<=h+size_h and h+size_h<y3<=h+size_h+int((ymax-ymin)/3.0):                     
                             f.write('{} {} {} {} {} {} {} {} {} {}'.format(float(1),float(y1-h),float(x2-w),float(y2-h),float(x3-w),float(size_h-1),float(1),float(size_h-1),label,kunnan))
                     #上面超出图
                     if h-int((ymax-ymin)/2.0)<y1<=h and h<y3<=h+size_h:  
                         #w在图内
                         if w<x1<=w+size_w and w<x2<=w+size_w:
                             f.write('{} {} {} {} {} {} {} {} {} {}'.format(float(x1-w),float(1),float(x2-w),float(1),float(x3-w),float(y3-h),float(x4-w),float(y4-h),label,kunnan))
                         #w在图右上  
                         if w<x1<=w+size_w and w+size_w<x2<=w+size_w+int((xmax-xmin)/3.0) and h-int((ymax-ymin)/3.0)<y1<=h and h<y3<=h+size_h:
                             f.write('{} {} {} {} {} {} {} {} {} {}'.format(float(x1-w),float(1),float(size_w-1),float(1),float(size_w-1),float(y3-h),float(x4-w),float(y4-h),label,kunnan))
                     #右面超出图
                     if w<x1<=w+size_w and w+size_w<x2<=w+size_w+int((xmax-xmin)/2.0): 
                         #H在图内
                         if h<y1<=h+size_h and h<y3<=h+size_h:
                             f.write('{} {} {} {} {} {} {} {} {} {}'.format(float(x1-w),float(y1-h),float(size_w-1),float(y2-h),float(size_w-1),float(y3-h),float(x4-w),float(y4-h),label,kunnan))
                         #w在图右下 
                         if w<x1<=w+size_w and w+size_w<x2<=w+size_w+int((xmax-xmin)/3.0) and h+size_h<y3<=h+size_h+int((ymax-ymin)/3.0) and h<y1<=h+size_h:
                             f.write('{} {} {} {} {} {} {} {} {} {}'.format(float(x1-w),float(y1-h),float(size_w-1),float(y2-h),float(size_w-1),float(size_h-1),float(x4-w),float(size_h-1),label,kunnan))
                     #下面超出图
                     if h+size_h<y3<=h+size_h+int((ymax-ymin)/2.0) and h<y1<=h+size_h:  
                         #w在图内
                         if w<x1<=w+size_w and w<x2<=w+size_w:
                             f.write('{} {} {} {} {} {} {} {} {} {}'.format(float(x1-w),float(y1-h),float(x2-w),float(y2-h),float(x3-w),float(size_h-1),float(x4-w),float(size_h-1),label,kunnan))
        f.close()
    #print(category_set)

if __name__ == '__main__':
    ims_path='/media/xuejunda/2c8076c4-abf2-4d0f-89e3-4568b4f029cf/dataset/detection/DOTA/train/images/img512/'# 图像数据集的路径
    txt_path = '/media/xuejunda/2c8076c4-abf2-4d0f-89e3-4568b4f029cf/dataset/detection/DOTA/train/hbb/'#标签数据路径
    path = '/media/xuejunda/2c8076c4-abf2-4d0f-89e3-4568b4f029cf/dataset/detection/DOTA/train/txt512/'#txt输出路径      
    tqtxt(ims_path,txt_path,path,size_h=512,size_w=512)

3.样本量少的类别数据扩充

3.1DOTAv1.5样本量统计结果:

plane: 8072.0
small-vehicle: 126501.0
large-vehicle: 22218.0
roundabout: 437.0
bridge: 2075.0
soccer-ball-field: 338.0
helicopter: 635.0
ground-track-field: 331.0
baseball-diamond: 412.0
storage-tank: 5346.0
tennis-court: 2425.0
swimming-pool: 2181.0
ship: 32973.0
harbor: 6016.0
basketball-court: 529.0
container-crane: 142.0

3.2挑选样本量小于1000的类别,进行数据扩充

我是挑选的裁剪之后的影像,挑选代码如下:

# -*- coding: utf-8 -*-
import os
import shutil
from PIL import Image
import matplotlib.pyplot as plt
category_set = ['roundabout','soccer-ball-field','helicopter','ground-track-field','baseball-diamond','basketball-court','container-crane']
def shaixuan(path_txt,path_txt_out,path_img,path_img_out):
    ims_list=os.listdir(path_txt)
    for im_list in ims_list:
        name = im_list[:-4]
        txtpath = path_txt + im_list
        txtpathout = path_txt_out + im_list
        imgpath = path_img + name + '.png'
        imgpathout = path_img_out + name + '.png'
        with open(txtpath, 'r') as f:   #打开txt文件          
             i = 0
             lines = f.readlines()
             #print(len(lines))
             #splitlines = [x.strip().split(' ') for x in lines]  #根据空格分割
             for line  in lines:
                 if i in [0,1]:
                     i = i+1
                     continue
                 splitline = line.split(' ')
                 label = splitline[8]
                 kunnan = splitline[9]
                 if label in category_set:#只书写指定的类别  
                     shutil.copyfile(txtpath, txtpathout)
                     shutil.copyfile(imgpath, imgpathout)  
                     break

if __name__ == '__main__':
    path_txt = '/home/xuejunda/data/VOCdevkit/mydataset/train_all_1024txt/'
    path_txt_out = '/home/xuejunda/data/VOCdevkit/mydataset/kuochongtxt/'
    path_img = '/home/xuejunda/data/VOCdevkit/mydataset/train_all_1024img/'
    path_img_out = '/home/xuejunda/data/VOCdevkit/mydataset/kuochongimg/'     
    shaixuan(path_txt,path_txt_out,path_img,path_img_out)

3.3数据扩充

在这里插入图片描述
利用图像旋转以及镜像的方式进行数据扩充,上图表明共可以产生除原图外额外7张不同的图像(分别为原图的3次旋转以及任意一种镜像之后的4次旋转)。
数据扩充代码如下:

# -*- coding: utf-8 -*-
import cv2
import os
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
def kuochong(img1,img_out,txt,txt_out,size_h=1024,size_w=1024):
    ims_list=os.listdir(img1)
    for im_list in ims_list:
        name = im_list[:-4]
        txtpath = txt + name + '.txt'
        img = cv2.imread(img1+im_list)
        img_zz = cv2.transpose(img)
#zz
        txtout_zz = txt_out + name + '_zz' '.txt'
        imgout_zz = img_out + name + '_zz' + '.png'
        cv2.imwrite(imgout_zz,img_zz)
        f = open(txtout_zz,'a')
        with open(txtpath, 'r') as f_in:         
             i = 0
             lines = f_in.readlines()
             for line  in lines:
                 if i in [0,1]:
                     f.write(line)
                     i = i+1
                     continue
                 splitline = line.split(' ')
                 label = splitline[8]
                 kunnan = splitline[9]           
                 x1 = int(float(splitline[0]))
                 y1 = int(float(splitline[1]))
                 x2 = int(float(splitline[2]))
                 y2 = int(float(splitline[3]))
                 x3 = int(float(splitline[4]))
                 y3 = int(float(splitline[5]))  
                 x4 = int(float(splitline[6]))
                 y4 = int(float(splitline[7]))                   
                 f.write('{} {} {} {} {} {} {} {} {} {}'.format(float(y1),float(x1),float(y4),float(x4),float(y3),float(x3),float(y2),float(x2),label,kunnan))
        f.close()
#zs90
        txtout_zs90 = txt_out + name + '_zs90' '.txt'
        imgout_zs90 = img_out + name + '_zs90' + '.png'
        img_zs90 = cv2.flip(img_zz,1)
        cv2.imwrite(imgout_zs90,img_zs90)
        f = open(txtout_zs90,'a')
        with open(txtpath, 'r') as f_in:         
             i = 0
             lines = f_in.readlines()
             for line  in lines:
                 if i in [0,1]:
                     f.write(line)
                     i = i+1
                     continue
                 splitline = line.split(' ')
                 label = splitline[8]
                 kunnan = splitline[9]           
                 x1 = int(float(splitline[0]))
                 y1 = int(float(splitline[1]))
                 x2 = int(float(splitline[2]))
                 y2 = int(float(splitline[3]))
                 x3 = int(float(splitline[4]))
                 y3 = int(float(splitline[5]))  
                 x4 = int(float(splitline[6]))
                 y4 = int(float(splitline[7]))                   
                 f.write('{} {} {} {} {} {} {} {} {} {}'.format(float(size_h-y4),float(x4),float(size_h-y1),float(x1),float(size_h-y2),float(x2),float(size_h-y3),float(x3),label,kunnan))
        f.close()
#zs180
        txtout_zs180 = txt_out + name + '_zs180' '.txt'
        imgout_zs180 = img_out + name + '_zs180' + '.png'
        img_zs180 = cv2.flip(img,-1)
        cv2.imwrite(imgout_zs180,img_zs180)
        f = open(txtout_zs180,'a')
        with open(txtpath, 'r') as f_in:         
             i = 0
             lines = f_in.readlines()
             for line  in lines:
                 if i in [0,1]:
                     f.write(line)
                     i = i+1
                     continue
                 splitline = line.split(' ')
                 label = splitline[8]
                 kunnan = splitline[9]           
                 x1 = int(float(splitline[0]))
                 y1 = int(float(splitline[1]))
                 x2 = int(float(splitline[2]))
                 y2 = int(float(splitline[3]))
                 x3 = int(float(splitline[4]))
                 y3 = int(float(splitline[5]))  
                 x4 = int(float(splitline[6]))
                 y4 = int(float(splitline[7]))                   
                 f.write('{} {} {} {} {} {} {} {} {} {}'.format(float(size_w-x3),float(size_h-y3),float(size_w-x4),float(size_h-y4),float(size_w-x1),float(size_h-y1),float(size_w-x2),float(size_h-y2),label,kunnan))
        f.close()
#zs270
        txtout_zs270 = txt_out + name + '_zs270' '.txt'
        imgout_zs270 = img_out + name + '_zs270' + '.png'
        img_zs270 = cv2.flip(img_zz,0)
        cv2.imwrite(imgout_zs270,img_zs270)
        f = open(txtout_zs270,'a')
        with open(txtpath, 'r') as f_in:         
             i = 0
             lines = f_in.readlines()
             for line  in lines:
                 if i in [0,1]:
                     f.write(line)
                     i = i+1
                     continue
                 splitline = line.split(' ')
                 label = splitline[8]
                 kunnan = splitline[9]           
                 x1 = int(float(splitline[0]))
                 y1 = int(float(splitline[1]))
                 x2 = int(float(splitline[2]))
                 y2 = int(float(splitline[3]))
                 x3 = int(float(splitline[4]))
                 y3 = int(float(splitline[5]))  
                 x4 = int(float(splitline[6]))
                 y4 = int(float(splitline[7]))                   
                 f.write('{} {} {} {} {} {} {} {} {} {}'.format(float(y2),float(size_w-x2),float(y3),float(size_w-x3),float(y4),float(size_w-x4),float(y1),float(size_w-x1),label,kunnan))
        f.close()
#zz90
        txtout_zz90 = txt_out + name + '_zz90' '.txt'
        imgout_zz90 = img_out + name + '_zz90' + '.png'
        img_zz90 = cv2.flip(img,1)
        cv2.imwrite(imgout_zz90,img_zz90)
        f = open(txtout_zz90,'a')
        with open(txtpath, 'r') as f_in:         
             i = 0
             lines = f_in.readlines()
             for line  in lines:
                 if i in [0,1]:
                     f.write(line)
                     i = i+1
                     continue
                 splitline = line.split(' ')
                 label = splitline[8]
                 kunnan = splitline[9]           
                 x1 = int(float(splitline[0]))
                 y1 = int(float(splitline[1]))
                 x2 = int(float(splitline[2]))
                 y2 = int(float(splitline[3]))
                 x3 = int(float(splitline[4]))
                 y3 = int(float(splitline[5]))  
                 x4 = int(float(splitline[6]))
                 y4 = int(float(splitline[7]))                   
                 f.write('{} {} {} {} {} {} {} {} {} {}'.format(float(size_w-x2),float(y2),float(size_w-x1),float(y1),float(size_w-x4),float(y4),float(size_w-x3),float(y3),label,kunnan))
        f.close()
#zz180
        txtout_zz180 = txt_out + name + '_zz180' '.txt'
        imgout_zz180 = img_out + name + '_zz180' + '.png'
        img_zz180 = cv2.flip(img_zz,-1)
        cv2.imwrite(imgout_zz180,img_zz180)
        f = open(txtout_zz180,'a')
        with open(txtpath, 'r') as f_in:         
             i = 0
             lines = f_in.readlines()
             for line  in lines:
                 if i in [0,1]:
                     f.write(line)
                     i = i+1
                     continue
                 splitline = line.split(' ')
                 label = splitline[8]
                 kunnan = splitline[9]           
                 x1 = int(float(splitline[0]))
                 y1 = int(float(splitline[1]))
                 x2 = int(float(splitline[2]))
                 y2 = int(float(splitline[3]))
                 x3 = int(float(splitline[4]))
                 y3 = int(float(splitline[5]))  
                 x4 = int(float(splitline[6]))
                 y4 = int(float(splitline[7]))                   
                 f.write('{} {} {} {} {} {} {} {} {} {}'.format(float(size_h-y3),float(size_w-x3),float(size_h-y2),float(size_w-x2),float(size_h-y1),float(size_w-x1),float(size_h-y4),float(size_w-x4),label,kunnan))
        f.close()
#zz270
        txtout_zz270 = txt_out + name + '_zz270' '.txt'
        imgout_zz270 = img_out + name + '_zz270' + '.png'
        img_zz270 = cv2.flip(img,0)
        cv2.imwrite(imgout_zz270,img_zz270)
        f = open(txtout_zz270,'a')
        with open(txtpath, 'r') as f_in:         
             i = 0
             lines = f_in.readlines()
             for line  in lines:
                 if i in [0,1]:
                     f.write(line)
                     i = i+1
                     continue
                 splitline = line.split(' ')
                 label = splitline[8]
                 kunnan = splitline[9]           
                 x1 = int(float(splitline[0]))
                 y1 = int(float(splitline[1]))
                 x2 = int(float(splitline[2]))
                 y2 = int(float(splitline[3]))
                 x3 = int(float(splitline[4]))
                 y3 = int(float(splitline[5]))  
                 x4 = int(float(splitline[6]))
                 y4 = int(float(splitline[7]))                   
                 f.write('{} {} {} {} {} {} {} {} {} {}'.format(float(x4),float(size_h-y4),float(x3),float(size_h-y3),float(x2),float(size_h-y2),float(x1),float(size_h-y1),label,kunnan))
        f.close()
if __name__ == '__main__':
    img1='/home/xuejunda/data/VOCdevkit/mydataset/kuochongimg/'# 图像数据集的路径
    img_out = '/home/xuejunda/data/VOCdevkit/mydataset/kuochonghouimg/'
    txt = '/home/xuejunda/data/VOCdevkit/mydataset/kuochongtxt/'
    txt_out = '/home/xuejunda/data/VOCdevkit/mydataset/kuochonghoutxt/'      
    kuochong(img1,img_out,txt,txt_out,size_h=1024,size_w=1024)

扩充后样本量:

plane: 9862.0
small-vehicle: 179726.0
large-vehicle: 30701.0
roundabout: 3768.0
bridge: 2619.0
soccer-ball-field: 2432.0
helicopter: 5040.0
ground-track-field: 2320.0
baseball-diamond: 3624.0
storage-tank: 6410.0
tennis-court: 7256.0
swimming-pool: 2932.0
ship: 39350.0
harbor: 6734.0
basketball-court: 4824.0
container-crane: 1208.0

  • 1
    点赞
  • 17
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值