【目标检测】一些数据集处理常用代码

暮已深

已于 2024-03-22 14:40:08 修改

阅读量1.5k

点赞数 3

分类专栏：目标检测文章标签： python 目标检测

于 2022-03-05 21:09:05 首次发布

本文链接：https://blog.csdn.net/gsgs1234/article/details/123299895

版权

目标检测专栏收录该内容

9 篇文章 1 订阅

订阅专栏

一些数据处理常用代码

在训练目标检测模型时，不同的数据集标签格式不一，不同模型要求的数据格式不一，最近一直在各种格式转换，用到的代码(大多数是找的别人的经试验能用的，部分是根据自己需求写的)在这儿总结一下。

文章目录

一些数据处理常用代码

1.标签数据格式转换

(1).txt转xml【DOTA->VOC】

DOTA数据集的标签文件为txt格式，内容为:
四个角点坐标(顺时针排列）
类别名
difficult：0,1 表示实例是否难以识别，0表示不难1表示困难

x1 y1 x2 y2 x3 y3 x4 y4 category difficult
在这里插入图片描述
DOTA标签转换为xml格式代码：

'''
DOTA数据集中标签为txt，将其转换为xml

类别数目：15
类别名称：plane, ship, storage tank, baseball diamond, tennis court, basketball court,
ground track field, harbor, bridge, large vehicle, small vehicle, helicopter, roundabout,
soccer ball field , swimming pool

'''
import os
import cv2
from xml.dom.minidom import Document
# import importlib,sys


# stdi, stdo, stde = sys.stdin, sys.stdout, sys.stderr
# importlib.reload(sys)
# sys.setdefaultencoding('utf-8')
# sys.stdin, sys.stdout, sys.stderr = stdi, stdo, stde


category_set = ['plane', 'baseball-diamond', 'bridge', 'ground-track-field',
                'small-vehicle', 'large-vehicle', 'ship', 'tennis-court',
                'basketball-court', 'storage-tank', 'soccer-ball-field',
                'roundabout', 'harbor', 'swimming-pool', 'helicopter']


def custombasename(fullname):
    return os.path.basename(os.path.splitext(fullname)[0])


def limit_value(a, b):
    if a < 1:
        a = 1
    if a >= b:
        a = b - 1
    return a


def readlabeltxt(txtpath, height, width, hbb=True):
    print(txtpath)
    with open(txtpath, 'r') as f_in:  # 打开txt文件
        lines = f_in.readlines()
        splitlines = [x.strip().split(' ') for x in lines]  # 根据空格分割
        boxes = []
        for i, splitline in enumerate(splitlines):
            if i in [0, 1]:   # DOTA数据集前两行对于我们来说是无用的
                continue
            #
            # if len(splitline)<10:
            #     print(txtpath+lines)


            label = splitline[8]
            kunnan = splitline[9]
            if label not in category_set:  # 只书写制定的类别
                print(label)
                continue
            x1 = int(float(splitline[0]))
            y1 = int(float(splitline[1]))
            x2 = int(float(splitline[2]))
            y2 = int(float(splitline[3]))
            x3 = int(float(splitline[4]))
            y3 = int(float(splitline[5]))
            x4 = int(float(splitline[6]))
            y4 = int(float(splitline[7]))
            # 如果是hbb
            if hbb:
                xx1 = min(x1, x2, x3, x4)
                xx2 = max(x1, x2, x3, x4)
                yy1 = min(y1, y2, y3, y4)
                yy2 = max(y1, y2, y3, y4)

                xx1 = limit_value(xx1, width)
                xx2 = limit_value(xx2, width)
                yy1 = limit_value(yy1, height)
                yy2 = limit_value(yy2, height)

                box = [xx1, yy1, xx2, yy2, label, kunnan]
                boxes.append(box)
            else:  # 否则是obb
                x1 = limit_value(x1, width)
                y1 = limit_value(y1, height)
                x2 = limit_value(x2, width)
                y2 = limit_value(y2, height)
                x3 = limit_value(x3, width)
                y3 = limit_value(y3, height)
                x4 = limit_value(x4, width)
                y4 = limit_value(y4, height)

                box = [x1, y1, x2, y2, x3, y3, x4, y4, label, kunnan]
                boxes.append(box)
    return boxes


def writeXml(tmp, imgname, w, h, d, bboxes, hbb=True):
    doc = Document()
    # owner
    annotation = doc.createElement('annotation')
    doc.appendChild(annotation)
    # owner
    folder = doc.createElement('folder')
    annotation.appendChild(folder)
    folder_txt = doc.createTextNode("VOC2007")
    folder.appendChild(folder_txt)

    filename = doc.createElement('filename')
    annotation.appendChild(filename)
    filename_txt = doc.createTextNode(imgname)
    filename.appendChild(filename_txt)
    # ones#
    source = doc.createElement('source')
    annotation.appendChild(source)

    database = doc.createElement('database')
    source.appendChild(database)
    database_txt = doc.createTextNode("My Database")
    database.appendChild(database_txt)

    annotation_new = doc.createElement('annotation')
    source.appendChild(annotation_new)
    annotation_new_txt = doc.createTextNode("VOC2007")
    annotation_new.appendChild(annotation_new_txt)

    image = doc.createElement('image')
    source.appendChild(image)
    image_txt = doc.createTextNode("flickr")
    image.appendChild(image_txt)
    # owner
    owner = doc.createElement('owner')
    annotation.appendChild(owner)

    flickrid = doc.createElement('flickrid')
    owner.appendChild(flickrid)
    flickrid_txt = doc.createTextNode("NULL")
    flickrid.appendChild(flickrid_txt)

    ow_name = doc.createElement('name')
    owner.appendChild(ow_name)
    ow_name_txt = doc.createTextNode("idannel")
    ow_name.appendChild(ow_name_txt)
    # onee#
    # twos#
    size = doc.createElement('size')
    annotation.appendChild(size)

    width = doc.createElement('width')
    size.appendChild(width)
    width_txt = doc.createTextNode(str(w))
    width.appendChild(width_txt)

    height = doc.createElement('height')
    size.appendChild(height)
    height_txt = doc.createTextNode(str(h))
    height.appendChild(height_txt)

    depth = doc.createElement('depth')
    size.appendChild(depth)
    depth_txt = doc.createTextNode(str(d))
    depth.appendChild(depth_txt)
    # twoe#
    segmented = doc.createElement('segmented')
    annotation.appendChild(segmented)
    segmented_txt = doc.createTextNode("0")
    segmented.appendChild(segmented_txt)

    for bbox in bboxes:
        # threes#
        object_new = doc.createElement("object")
        annotation.appendChild(object_new)

        name = doc.createElement('name')
        object_new.appendChild(name)
        name_txt = doc.createTextNode(str(bbox[-2]))
        name.appendChild(name_txt)

        pose = doc.createElement('pose')
        object_new.appendChild(pose)
        pose_txt = doc.createTextNode("Unspecified")
        pose.appendChild(pose_txt)

        truncated = doc.createElement('truncated')
        object_new.appendChild(truncated)
        truncated_txt = doc.createTextNode("0")
        truncated.appendChild(truncated_txt)

        difficult = doc.createElement('difficult')
        object_new.appendChild(difficult)
        difficult_txt = doc.createTextNode(bbox[-1])
        difficult.appendChild(difficult_txt)
        # threes-1#
        bndbox = doc.createElement('bndbox')
        object_new.appendChild(bndbox)

        if hbb:
            xmin = doc.createElement('xmin')
            bndbox.appendChild(xmin)
            xmin_txt = doc.createTextNode(str(bbox[0]))
            xmin.appendChild(xmin_txt)

            ymin = doc.createElement('ymin')
            bndbox.appendChild(ymin)
            ymin_txt = doc.createTextNode(str(bbox[1]))
            ymin.appendChild(ymin_txt)

            xmax = doc.createElement('xmax')
            bndbox.appendChild(xmax)
            xmax_txt = doc.createTextNode(str(bbox[2]))
            xmax.appendChild(xmax_txt)

            ymax = doc.createElement('ymax')
            bndbox.appendChild(ymax)
            ymax_txt = doc.createTextNode(str(bbox[3]))
            ymax.appendChild(ymax_txt)
        else:
            x0 = doc.createElement('x0')
            bndbox.appendChild(x0)
            x0_txt = doc.createTextNode(str(bbox[0]))
            x0.appendChild(x0_txt)

            y0 = doc.createElement('y0')
            bndbox.appendChild(y0)
            y0_txt = doc.createTextNode(str(bbox[1]))
            y0.appendChild(y0_txt)

            x1 = doc.createElement('x1')
            bndbox.appendChild(x1)
            x1_txt = doc.createTextNode(str(bbox[2]))
            x1.appendChild(x1_txt)

            y1 = doc.createElement('y1')
            bndbox.appendChild(y1)
            y1_txt = doc.createTextNode(str(bbox[3]))
            y1.appendChild(y1_txt)

            x2 = doc.createElement('x2')
            bndbox.appendChild(x2)
            x2_txt = doc.createTextNode(str(bbox[4]))
            x2.appendChild(x2_txt)

            y2 = doc.createElement('y2')
            bndbox.appendChild(y2)
            y2_txt = doc.createTextNode(str(bbox[5]))
            y2.appendChild(y2_txt)

            x3 = doc.createElement('x3')
            bndbox.appendChild(x3)
            x3_txt = doc.createTextNode(str(bbox[6]))
            x3.appendChild(x3_txt)

            y3 = doc.createElement('y3')
            bndbox.appendChild(y3)
            y3_txt = doc.createTextNode(str(bbox[7]))
            y3.appendChild(y3_txt)

    xmlname = os.path.splitext(imgname)[0]
    tempfile = os.path.join(tmp, xmlname + '.xml')
    with open(tempfile, 'wb') as f:
        f.write(doc.toprettyxml(indent='\t', encoding='utf-8'))
    return


if __name__ == '__main__':
    data_path = '/home/DOTA/train2017'  #
    images_path = os.path.join(data_path)  # 样本图片路径
    labeltxt_path = os.path.join('/home/DOTA/labeltxt/')  # 
    anno_new_path = os.path.join('/home/DOTA/Annotations_xml/')  # 新的voc格式存储位置（hbb形式）
    ext = '.png'  # 样本图片的后缀
    filenames = os.listdir(labeltxt_path)  # 获取每一个txt的名称
    for filename in filenames:
        filepath = labeltxt_path + '/' + filename  # 每一个DOTA标签的具体路径
        picname = os.path.splitext(filename)[0] + ext
        pic_path = os.path.join(images_path, picname)
        im = cv2.imread(pic_path)  # 读取相应的图片
        (H, W, D) = im.shape  # 返回样本的大小
        boxes = readlabeltxt(filepath, H, W, hbb=True)  # 默认是矩形（hbb）得到gt
        if len(boxes) == 0:
            print('文件为空', filepath)
        # 读取对应的样本图片，得到H,W,D用于书写xml

        # 书写xml
        writeXml(anno_new_path, picname, W, H, D, boxes, hbb=True)
        print('正在处理%s' % filename)

(2).标签转为json格式【NWPU/DIOR->COCO】

一些数据集转COCO格式的代码NWPU，DIOR数据集标签转为COCO格式的json代码，

import os
import cv2
import json
import argparse
from tqdm import tqdm
import xml.etree.ElementTree as ET

COCO_DICT=['images','annotations','categories']
IMAGES_DICT=['file_name','height','width','id']

ANNOTATIONS_DICT=['image_id','iscrowd','area','bbox','category_id','id']

CATEGORIES_DICT=['id','name']
## {'supercategory': 'person', 'id': 1, 'name': 'person'}
## {'supercategory': 'vehicle', 'id': 2, 'name': 'bicycle'}
YOLO_CATEGORIES=['person']
RSOD_CATEGORIES=['aircraft','playground','overpass','oiltank']
NWPU_CATEGORIES=['airplane','ship','storage tank','baseball diamond','tennis court',\
					'basketball court','ground track field','harbor','bridge','vehicle']

VOC_CATEGORIES=['aeroplane','bicycle','bird','boat','bottle','bus','car','cat','chair','cow',\					'diningtable','dog','horse','motorbike','person','pottedplant','sheep','sofa','train','tvmonitor']

DIOR_CATEGORIES=['golffield','Expressway-toll-station','vehicle','trainstation','chimney','storagetank',\
					'ship','harbor','airplane','groundtrackfield','tenniscourt','dam','basketballcourt',\
					'Expressway-Service-area','stadium','airport','baseballfield','bridge','windmill','overpass']

parser=argparse.ArgumentParser(description='2COCO')
#parser.add_argument('--image_path',type=str,default=r'T:/shujuji/DIOR/JPEGImages-trainval/',help='config file')
parser.add_argument('--image_path',type=str,default=r'G:/postgraduate1-1/EfficientdetPaper/paper/xiu/NWPU VHR-10 dataset/positive image set/',help='config file')
#parser.add_argument('--annotation_path',type=str,default=r'T:/shujuji/DIOR/Annotations/',help='config file')
parser.add_argument('--annotation_path',type=str,default=r'G:/postgraduate1-1/EfficientdetPaper/paper/xiu/NWPU VHR-10 dataset/ground truth/',help='config file')
parser.add_argument('--dataset',type=str,default='NWPU',help='config file')
parser.add_argument('--save',type=str,default='G:/postgraduate1-1/EfficientdetPaper/paper/xiu/NWPU VHR-10 dataset/train.json',help='config file')
args=parser.parse_args()
def load_json(path):
	with open(path,'r') as f:
		json_dict=json.load(f)
		for i in json_dict:
			print(i)
		print(json_dict['annotations'])
def save_json(dict,path):
	print('SAVE_JSON...')
	with open(path,'w') as f:
		json.dump(dict,f)
	print('SUCCESSFUL_SAVE_JSON:',path)
def load_image(path):
	img=cv2.imread(path)
	#print(path)
	return img.shape[0],img.shape[1]
def generate_categories_dict(category):       #ANNOTATIONS_DICT=['image_id','iscrowd','area','bbox','category_id','id']
	print('GENERATE_CATEGORIES_DICT...')
	return [{CATEGORIES_DICT[0]:category.index(x)+1,CATEGORIES_DICT[1]:x} for x in category]  #CATEGORIES_DICT=['id','name']
def generate_images_dict(imagelist,image_path,start_image_id=11725):  #IMAGES_DICT=['file_name','height','width','id']
	print('GENERATE_IMAGES_DICT...')
	images_dict=[]
	with tqdm(total=len(imagelist)) as load_bar:
		for x in imagelist:  #x就是图片的名称
			#print(start_image_id)
			dict={IMAGES_DICT[0]:x,IMAGES_DICT[1]:load_image(image_path+x)[0],\
					IMAGES_DICT[2]:load_image(image_path+x)[1],IMAGES_DICT[3]:imagelist.index(x)+start_image_id}
			load_bar.update(1)
			images_dict.append(dict)
	return images_dict
	# return [{IMAGES_DICT[0]:x,IMAGES_DICT[1]:load_image(image_path+x)[0],\
	# 				IMAGES_DICT[2]:load_image(image_path+x)[1],IMAGES_DICT[3]:imagelist.index(x)+start_image_id} for x in imagelist]
def DIOR_Dataset(image_path,annotation_path,start_image_id=11725,start_id=0):
	categories_dict=generate_categories_dict(DIOR_CATEGORIES)    #CATEGORIES_DICT=['id'：，1'name'：golffield......]  id从1开始
	imgname=os.listdir(image_path)
	images_dict=generate_images_dict(imgname,image_path,start_image_id)  #IMAGES_DICT=['file_name','height','width','id']  id从0开始的
	print('GENERATE_ANNOTATIONS_DICT...')  #生成cooc的注记   ANNOTATIONS_DICT=['image_id','iscrowd','area','bbox','category_id','id']
	annotations_dict=[]
	id=start_id
	for i in images_dict:
		image_id=i['id']
		print(image_id)
		image_name=i['file_name']
		annotation_xml=annotation_path+image_name.split('.')[0]+'.xml'
		tree=ET.parse(annotation_xml)
		root=tree.getroot()
		for j in root.findall('object'):
			category=j.find('name').text
			category_id=DIOR_CATEGORIES.index(category)  #字典的索引，是从1开始的
			x_min=float(j.find('bndbox').find('xmin').text)
			y_min=float(j.find('bndbox').find('ymin').text)
			w=float(j.find('bndbox').find('xmax').text)-x_min
			h=float(j.find('bndbox').find('ymax').text)-y_min
			bbox=[x_min,y_min,w,h]
			dict={'image_id':image_id,'iscrowd':0,'bbox':bbox,'category_id':category_id,'id':id}
			annotations_dict.append(dict)
			id=id+1
	print('SUCCESSFUL_GENERATE_DIOR_JSON')
	return {COCO_DICT[0]:images_dict,COCO_DICT[1]:annotations_dict,COCO_DICT[2]:categories_dict}
def NWPU_Dataset(image_path,annotation_path,start_image_id=0,start_id=0):
	categories_dict=generate_categories_dict(NWPU_CATEGORIES)
	imgname=os.listdir(image_path)
	images_dict=generate_images_dict(imgname,image_path,start_image_id)
	print('GENERATE_ANNOTATIONS_DICT...')
	annotations_dict=[]
	id=start_id
	for i in images_dict:
		image_id=i['id']
		image_name=i['file_name']
		annotation_txt=annotation_path+image_name.split('.')[0]+'.txt'
		txt=open(annotation_txt,'r')
		lines=txt.readlines()
		for j in lines:
			if j=='\n':
				continue
			category_id=int(j.split(',')[4])

			category=NWPU_CATEGORIES[category_id-1]
			print(category_id,'        ',category)
			x_min=float(j.split(',')[0].split('(')[1])
			y_min=float(j.split(',')[1].split(')')[0])
			w=float(j.split(',')[2].split('(')[1])-x_min
			h=float(j.split(',')[3].split(')')[0])-y_min
			area=w*h
			bbox=[x_min,y_min,w,h]
			dict = {'image_id': image_id, 'iscrowd': 0, 'area': area, 'bbox': bbox, 'category_id': category_id,
					'id': id}
			id=id+1
			annotations_dict.append(dict)
	print('SUCCESSFUL_GENERATE_NWPU_JSON')
	return {COCO_DICT[0]:images_dict,COCO_DICT[1]:annotations_dict,COCO_DICT[2]:categories_dict}

def YOLO_Dataset(image_path,annotation_path,start_image_id=0,start_id=0):
	categories_dict=generate_categories_dict(YOLO_CATEGORIES)
	imgname=os.listdir(image_path)
	images_dict=generate_images_dict(imgname,image_path)
	print('GENERATE_ANNOTATIONS_DICT...')
	annotations_dict=[]
	id=start_id
	for i in images_dict:
		image_id=i['id']
		image_name=i['file_name']
		W,H=i['width'],i['height']
		annotation_txt=annotation_path+image_name.split('.')[0]+'.txt'
		txt=open(annotation_txt,'r')
		lines=txt.readlines()		
		for j in lines:
			category_id=int(j.split(' ')[0])+1

			category=YOLO_CATEGORIES
			x=float(j.split(' ')[1])
			y=float(j.split(' ')[2])
			w=float(j.split(' ')[3])
			h=float(j.split(' ')[4])
			x_min=(x-w/2)*W
			y_min=(y-h/2)*H
			w=w*W
			h=h*H
			area=w*h
			bbox=[x_min,y_min,w,h]
			dict={'image_id':image_id,'iscrowd':0,'area':area,'bbox':bbox,'category_id':category_id,'id':id}
			annotations_dict.append(dict)
			id=id+1
	print('SUCCESSFUL_GENERATE_YOLO_JSON')
	return {COCO_DICT[0]:images_dict,COCO_DICT[1]:annotations_dict,COCO_DICT[2]:categories_dict}
def RSOD_Dataset(image_path,annotation_path,start_image_id=0,start_id=0):
	categories_dict=generate_categories_dict(RSOD_CATEGORIES)
	imgname=os.listdir(image_path)
	images_dict=generate_images_dict(imgname,image_path,start_image_id)
	print('GENERATE_ANNOTATIONS_DICT...')
	annotations_dict=[]
	id=start_id
	for i in images_dict:
		image_id=i['id']
		image_name=i['file_name']
		annotation_txt=annotation_path+image_name.split('.')[0]+'.txt'
		txt=open(annotation_txt,'r')
		lines=txt.readlines()
		for j in lines:
			category=j.split('\t')[1]
			category_id=RSOD_CATEGORIES.index(category)+1
			x_min=float(j.split('\t')[2])
			y_min=float(j.split('\t')[3])
			w=float(j.split('\t')[4])-x_min
			h=float(j.split('\t')[5])-y_min
			bbox=[x_min,y_min,w,h]
			dict={'image_id':image_id,'iscrowd':0,'bbox':bbox,'category_id':category_id,'id':id}
			annotations_dict.append(dict)
			id=id+1
	print('SUCCESSFUL_GENERATE_RSOD_JSON')

	return {COCO_DICT[0]:images_dict,COCO_DICT[1]:annotations_dict,COCO_DICT[2]:categories_dict}
if __name__=='__main__':
	dataset=args.dataset   #数据集名字
	save=args.save  #json的保存路径
	image_path=args.image_path     #对于coco是图片的路径
	annotation_path=args.annotation_path   #coco的annotation路径
	if dataset=='RSOD':
		json_dict=RSOD_Dataset(image_path,annotation_path,0)
	if dataset=='NWPU':
		json_dict=NWPU_Dataset(image_path,annotation_path,0)
	if dataset=='DIOR':
		json_dict=DIOR_Dataset(image_path,annotation_path,11725)
	if dataset=='YOLO':
		json_dict=YOLO_Dataset(image_path,annotation_path,0)
	save_json(json_dict,save)

2. 影像及标签裁剪

(1).NWPU VHR

将NWPU VHR数据集影像及其标签裁剪为指定大小

import cv2
import os


#  图像宽不足裁剪宽度,填充至裁剪宽度
def fill_right(img, size_w):
    size = img.shape
    #  填充值为数据集均值
    img_fill_right = cv2.copyMakeBorder(img, 0, 0, 0, size_w - size[1],
                                        cv2.BORDER_CONSTANT, value=(107, 113, 115))
    return img_fill_right


#  图像高不足裁剪高度,填充至裁剪高度
def fill_bottom(img, size_h):
    size = img.shape
    img_fill_bottom = cv2.copyMakeBorder(img, 0, size_h - size[0], 0, 0,
                                         cv2.BORDER_CONSTANT, value=(107, 113, 115))
    return img_fill_bottom


#  图像宽高不足裁剪宽高度,填充至裁剪宽高度
def fill_right_bottom(img, size_w, size_h):
    size = img.shape
    img_fill_right_bottom = cv2.copyMakeBorder(img, 0, size_h - size[0], 0, size_w - size[1],
                                               cv2.BORDER_CONSTANT, value=(107, 113, 115))
    return img_fill_right_bottom


#  图像切割
#  img_floder 图像文件夹
#  out_img_floder 图像切割输出文件夹
#  size_w 切割图像宽
#  size_h 切割图像高
#  step 切割步长
def image_split(img_floder, out_img_floder, size_w, size_h, step):
    print("进行图像的裁剪--------------------------------")
    img_list = os.listdir(img_floder)
    count = 0
    for img_name in img_list:
        number = 0
        #  去除.png后缀
        name = img_name[:-4]
        img = cv2.imread(img_floder + "" + img_name)
        size = img.shape
        #  若图像宽高大于切割宽高
        if size[0] >= size_h and size[1] >= size_w:
            count = count + 1
            for h in range(0, size[0] - 1, step):
                start_h = h
                for w in range(0, size[1] - 1, step):
                    start_w = w
                    end_h = start_h + size_h
                    if end_h > size[0]:
                        start_h = size[0] - size_h
                        end_h = start_h + size_h
                    end_w = start_w + size_w
                    if end_w > size[1]:
                        start_w = size[1] - size_w
                    end_w = start_w + size_w
                    cropped = img[start_h: end_h, start_w: end_w]
                    #  用起始坐标来命名切割得到的图像，为的是方便后续标签数据抓取
                    name_img = name + '_' + str(start_h) + '_' + str(start_w)
                    cv2.imwrite('{}/{}.jpg'.format(out_img_floder, name_img), cropped)
                    number = number + 1
        #  若图像高大于切割高,但宽小于切割宽
        elif size[0] >= size_h and size[1] < size_w:
            print('图片{}需要在右面补齐'.format(name))
            count = count + 1
            img0 = fill_right(img, size_w)
            for h in range(0, size[0] - 1, step):
                start_h = h
                start_w = 0
                end_h = start_h + size_h
                if end_h > size[0]:
                    start_h = size[0] - size_h
                    end_h = start_h + size_h
                end_w = start_w + size_w
                cropped = img0[start_h: end_h, start_w: end_w]
                name_img = name + '_' + str(start_h) + '_' + str(start_w)
                cv2.imwrite('{}/{}.jpg'.format(out_img_floder, name_img), cropped)
                number = number + 1
        #  若图像宽大于切割宽,但高小于切割高
        elif size[0] < size_h and size[1] >= size_w:
            count = count + 1
            print('图片{}需要在下面补齐'.format(name))
            img0 = fill_bottom(img, size_h)
            for w in range(0, size[1] - 1, step):
                start_h = 0
                start_w = w
                end_w = start_w + size_w
                if end_w > size[1]:
                    start_w = size[1] - size_w
                    end_w = start_w + size_w
                end_h = start_h + size_h
                cropped = img0[start_h: end_h, start_w: end_w]
                name_img = name + '_' + str(start_h) + '_' + str(start_w)
                cv2.imwrite('{}/{}.jpg'.format(out_img_floder, name_img), cropped)
                number = number + 1
        #  若图像宽高小于切割宽高
        elif size[0] < size_h and size[1] < size_w:
            count = count + 1
            print('图片{}需要在下面和右面补齐'.format(name))
            img0 = fill_right_bottom(img, size_w, size_h)
            cropped = img0[0: size_h, 0: size_w]
            name_img = name + '_' + '0' + '_' + '0'
            cv2.imwrite('{}/{}.jpg'.format(out_img_floder, name_img), cropped)
            number = number + 1
        print('{}.jpg切割成{}张.'.format(name, number))
    print('共完成{}张图片'.format(count))


#  txt切割
#  out_img_floder 图像切割输出文件夹
#  txt_floder txt文件夹
#  out_txt_floder txt切割输出文件夹
#  size_w 切割图像宽
#  size_h 切割图像高
def txt_split(out_img_floder, txt_floder, out_txt_floder, size_h, size_w):
    print("进行标签文件的裁剪----------------------------")
    img_list = os.listdir(out_img_floder)
    for img_name in img_list:
        #  去除.png后缀
        name = img_name[:-4]
        #  得到原图像(也即txt)索引 + 切割高 + 切割宽
        name_list = name.split('_')
        txt_name = name_list[0]
        h = int(name_list[1])
        w = int(name_list[2])
        txtpath = txt_floder + "" + txt_name + '.txt'
        out_txt_path = out_txt_floder + "" + name + '.txt'
        f = open(out_txt_path, 'a')
        #  打开txt文件
        with open(txtpath, 'r') as f_in:
            lines = f_in.readlines()
            #  逐行读取
            for line in lines:
                splitline = line.split(',')
                # print("---",splitline[0].split('(')[1])
                # print("---", splitline[1].split(')')[0])
                # print("---", splitline[2].split('(')[1])
                # print("---", splitline[3].split(')')[0])
                label = splitline[4]
                x1 = int(splitline[0].split('(')[1])
                y1 = int(splitline[1].split(')')[0])
                x2 = int(splitline[2].split('(')[1])
                y2 = int(splitline[3].split(')')[0])

                if w <= x1 <= w + size_w and w <= x2 <= w + size_w and h <= y1 <= h + size_h and h <= y2 <= h + size_h:
                    f.write('({},{}),({},{}),{}'.format(int(x1 - w),
                            int(y1 - h), int(x2 - w), int(y2 - h),
                            label))
        print('{}.txt切割完成.'.format(name))
    f.close()



#  图像数据集文件夹
img_floder = r'G:/NWPU VHR-10 dataset/img/'
#  切割得到的图像数据集存放文件夹
out_img_floder = r'G:/NWPU VHR-10 dataset/cut/img/'
#  txt数据集文件夹
txt_floder = r'G:/NWPU VHR-10 dataset/ground truth/'
#  切割后数据集的标签文件存放文件夹
out_txt_floder = r'G:/NWPU VHR-10 dataset/cut/label/'
#  切割图像宽
size_w = 500
#  切割图像高
size_h = 500
#  切割步长,重叠度为size_w - step
step = 400

image_split(img_floder, out_img_floder, size_w, size_h, step)
txt_split(out_img_floder, txt_floder, out_txt_floder, size_h, size_w)

(2).DOTA

DOTA影像裁剪的代码是一样的，标签裁剪的代码为：

# -*- coding: utf-8 -*-
'''
对裁剪后的影像中的标签实现自动抓取
'''
import cv2
import os
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt

def tqtxt(path,path_txt,path_out,size_h,size_w):
    ims_list=os.listdir(path)
    for im_list in ims_list:
        name_list = []
        name = im_list[:-4]
        name_list = name.split('_')
        if len(name_list)<2:
            continue
        h = int(name_list[1])
        w = int(name_list[2])
        txtpath = path_txt + name_list[0] + '.txt'
        txt_outpath = path_out + name + '.txt'
        f = open(txt_outpath,'a')
        with open(txtpath, 'r') as f_in:   #打开txt文件
             i = 0
             lines = f_in.readlines()
             for line  in lines:
                 if i in [0,1]:
                     f.write(line)     #txt前两行直接复制过去
                     i = i+1
                     continue
                 splitline = line.split(' ')
                 label = splitline[8]
                 kunnan = splitline[9]

                 x1 = int(float(splitline[0]))
                 y1 = int(float(splitline[1]))
                 x2 = int(float(splitline[2]))
                 y2 = int(float(splitline[3]))
                 x3 = int(float(splitline[4]))
                 y3 = int(float(splitline[5]))
                 x4 = int(float(splitline[6]))
                 y4 = int(float(splitline[7]))
                 if w<=x1<=w+size_w and w<=x2<=w+size_w and w<=x3<=w+size_w and w<=x4<=w+size_w and h<=y1<=h+size_h and h<=y2<=h+size_h and h<=y3<=h+size_h and h<=y4<=h+size_h:
                     f.write('{} {} {} {} {} {} {} {} {} {}'.format(float(x1-w),float(y1-h),float(x2-w),float(y2-h),float(x3-w),float(y3-h),float(x4-w),float(y4-h),label,kunnan))
        f.close()

if __name__ == '__main__':
    ims_path='/home/DOTA/img/'# 图像数据集的路径
    txt_path = '/home/DOTA/labeltxt/'#原数据集标签文件
    path = '/home/DOTA/selected_txt/'#切割后数据集的标签文件存放路径
    tqtxt(ims_path,txt_path,path,size_h=500,size_w=500)

3. 影像拼接

几张图拼接为一张图的代码
newgroup_selected.txt中为随机选择分好组的影像名称，每一行为一组，格式如下，拼接按照这个txt文件中的分组进行

[‘030’, ‘428’, ‘528’, ‘037’]

import os
from PIL import Image

IMAGES_PATH=r'G:/NWPU VHR-10 dataset/cut/img/'
IMAGES_FORMAT = ['.jpg', '.tif']  # 图片格式
IMAGE_SIZE = 500  # 每张小图片的大小
IMAGE_ROW = 2  # 图片间隔，也就是合并成一张图后，一共有几行
IMAGE_COLUMN = 2  # 图片间隔，也就是合并成一张图后，一共有几列
  # 图片转换后的地址

txt_path=r'G:/NWPU VHR-10 dataset/newgroup_selected.txt'   #生成的txt文档

open_txt=open(txt_path)   #打开txt文档
for ii in range(651, 701):       #拼接多少张影像，新的影像名为序号.jpg
    IMAGE_SAVE_PATH = r'G:/NWPU VHR-10 dataset/cut/stitcherImg/{}.jpg'.format(ii)
    aa=[]
    line=open_txt.readline()
    a=line.split("'")
    b,c,d,e=a[1],a[3],a[5],a[7]
    aa.append(b)
    aa.append(c)
    aa.append(d)
    aa.append(e)
    to_image = Image.new('RGB', (IMAGE_COLUMN * IMAGE_SIZE, IMAGE_ROW * IMAGE_SIZE)) #创建一个新图
    for y in range(1, IMAGE_ROW + 1):
        for x in range(1, IMAGE_COLUMN + 1):
            from_image = Image.open(IMAGES_PATH + aa[IMAGE_COLUMN * (y - 1) + x - 1]+'.jpg').resize(
                (IMAGE_SIZE, IMAGE_SIZE),Image.ANTIALIAS)
            to_image.paste(from_image, ((x - 1) * IMAGE_SIZE, (y - 1) * IMAGE_SIZE))
    to_image.save(IMAGE_SAVE_PATH)  # 保存新图

xml拼接

# 生成拼接图像的xml文件
import os
import xml.etree.ElementTree as ET


def indent(elem, level=0):
    i = "\n" + level * " "
    if len(elem):
        if not elem.text or not elem.text.strip():
            elem.text = i + " "
        if not elem.tail or not elem.tail.strip():
            elem.tail = i
        for elem in elem:
            indent(elem, level + 1)
        if not elem.tail or not elem.tail.strip():
            elem.tail = i
    else:
        if level and (not elem.tail or not elem.tail.strip()):
            elem.tail = i



src_path = r'/home/DOTA/xml/'
txt_path = r'/home/DOTA/newgroup_selected.txt'
open_txt = open(txt_path)
# read_txt=open_txt.readline()
for ii in range(3000, 3120):
    print("处理标签{}".format(ii))
    num = 0
    aa = []
    line = open_txt.readline()  # 一组拼接图像的xml文件进行合成
    # print(line)
    annotation = ET.Element("annotation")
    filename = ET.SubElement(annotation, "filename")
    filename.text = "P{}.png".format(ii)
    source = ET.SubElement(annotation, "source")
    databse = ET.SubElement(source, "databse")
    databse.text = "DOTA"
    size = ET.SubElement(annotation, "size")  # SubElement 子节点
    width = ET.SubElement(size, "width")  # SubElement 子节点
    width.text = "2000"
    height = ET.SubElement(size, "height")  # SubElement 子节点
    height.text = "2000"
    depth = ET.SubElement(size, "depth")  # SubElement 子节点
    depth.text = "3"
    segmented = ET.SubElement(annotation, "segmented")  # SubElement 子节点
    segmented.text = "0"
    a = line.split("'")
    b, c, d, e = a[1], a[3], a[5], a[7]
    aa.append(b)
    aa.append(c)
    aa.append(d)
    aa.append(e)
    for i in aa:
        if num == 0:
            target_dom = ET.parse(os.path.join(src_path, i) + ".xml")
            target_root = target_dom.getroot()  # 数据内存地址
            for i in target_root.iter('object'):
                object = ET.SubElement(annotation, "object")
                for j in i:

                    if j.tag == 'name':
                        j_tag = ET.SubElement(object, j.tag)  # SubElement 子节点
                        j_tag.text = j.text
                    if j.tag == 'pose':
                        j_tag = ET.SubElement(object, j.tag)  # SubElement 子节点
                        j_tag.text = j.text
                    if j.tag == 'bndbox':
                        j_tag = ET.SubElement(object, j.tag)  # SubElement 子节点
                        for m in j:
                            if m.tag == 'xmin':
                                m_tag = ET.SubElement(j_tag, m.tag)  # SubElement 子节点
                                m_tag.text = str(int(m.text) // 2)
                            if m.tag == 'ymin':
                                m_tag = ET.SubElement(j_tag, m.tag)  # SubElement 子节点
                                m_tag.text = str(int(m.text) // 2)
                            if m.tag == 'xmax':
                                m_tag = ET.SubElement(j_tag, m.tag)  # SubElement 子节点
                                m_tag.text = str(int(m.text) // 2)
                            if m.tag == 'ymax':
                                m_tag = ET.SubElement(j_tag, m.tag)  # SubElement 子节点
                                m_tag.text = str(int(m.text) // 2)

            num += 1
        elif num == 1:
            target_dom = ET.parse(os.path.join(src_path, i) + ".xml")
            target_root = target_dom.getroot()  # 数据内存地址
            for i in target_root.iter('object'):
                object = ET.SubElement(annotation, "object")
                for j in i:

                    if j.tag == 'name':
                        j_tag = ET.SubElement(object, j.tag)  # SubElement 子节点
                        j_tag.text = j.text
                    if j.tag == 'pose':
                        j_tag = ET.SubElement(object, j.tag)  # SubElement 子节点
                        j_tag.text = j.text
                    if j.tag == 'bndbox':
                        j_tag = ET.SubElement(object, j.tag)  # SubElement 子节点
                        for m in j:
                            if m.tag == 'xmin':
                                m_tag = ET.SubElement(j_tag, m.tag)  # 构建SubElement 子节点
                                m_tag.text = str((int(m.text) // 2) + 1000)
                            if m.tag == 'ymin':
                                m_tag = ET.SubElement(j_tag, m.tag)  # 构建SubElement 子节点
                                m_tag.text = str(int(m.text) // 2)
                            if m.tag == 'xmax':
                                m_tag = ET.SubElement(j_tag, m.tag)  # 构建SubElement 子节点
                                m_tag.text = str((int(m.text) // 2) + 1000)
                            if m.tag == 'ymax':
                                m_tag = ET.SubElement(j_tag, m.tag)  # 构建SubElement 子节点
                                m_tag.text = str(int(m.text) // 2)
            num += 1
        elif num == 2:
            target_dom = ET.parse(os.path.join(src_path, i) + ".xml")
            target_root = target_dom.getroot()  # 数据内存地址
            for i in target_root.iter('object'):
                object = ET.SubElement(annotation, "object")
                for j in i:

                    if j.tag == 'name':
                        j_tag = ET.SubElement(object, j.tag)  # SubElement 子节点
                        j_tag.text = j.text
                    if j.tag == 'pose':
                        j_tag = ET.SubElement(object, j.tag)  # SubElement 子节点
                        j_tag.text = j.text
                    if j.tag == 'bndbox':
                        j_tag = ET.SubElement(object, j.tag)  # SubElement 子节点
                        for m in j:
                            if m.tag == 'xmin':
                                m_tag = ET.SubElement(j_tag, m.tag)  # 构建SubElement 子节点
                                m_tag.text = str(int(m.text) // 2)
                            if m.tag == 'ymin':
                                m_tag = ET.SubElement(j_tag, m.tag)  # 构建SubElement 子节点
                                m_tag.text = str(int(m.text) // 2 + 1000)
                            if m.tag == 'xmax':
                                m_tag = ET.SubElement(j_tag, m.tag)  # 构建SubElement 子节点
                                m_tag.text = str(int(m.text) // 2)
                            if m.tag == 'ymax':
                                m_tag = ET.SubElement(j_tag, m.tag)  # 构建SubElement 子节点
                                m_tag.text = str(int(m.text) // 2 + 1000)
            num += 1
        elif num == 3:
            target_dom = ET.parse(os.path.join(src_path, i) + ".xml")
            target_root = target_dom.getroot()  # 数据内存地址
            for i in target_root.iter('object'):
                object = ET.SubElement(annotation, "object")
                for j in i:

                    if j.tag == 'name':
                        j_tag = ET.SubElement(object, j.tag)  # SubElement 子节点
                        j_tag.text = j.text
                    if j.tag == 'pose':
                        j_tag = ET.SubElement(object, j.tag)  # SubElement 子节点
                        j_tag.text = j.text
                    if j.tag == 'bndbox':
                        j_tag = ET.SubElement(object, j.tag)  # SubElement 子节点
                        for m in j:
                            if m.tag == 'xmin':
                                m_tag = ET.SubElement(j_tag, m.tag)  # 构建SubElement 子节点
                                m_tag.text = str(int(m.text) // 2 + 1000)
                            if m.tag == 'ymin':
                                m_tag = ET.SubElement(j_tag, m.tag)  # 构建SubElement 子节点
                                m_tag.text = str(int(m.text) // 2 + 1000)
                            if m.tag == 'xmax':
                                m_tag = ET.SubElement(j_tag, m.tag)  # 构建SubElement 子节点
                                m_tag.text = str(int(m.text) // 2 + 1000)
                            if m.tag == 'ymax':
                                m_tag = ET.SubElement(j_tag, m.tag)  # 构建SubElement 子节点
                                m_tag.text = str(int(m.text) // 2 + 1000)

    indent(annotation)
    et = ET.ElementTree(annotation)
    et.write(r"/home/DOTA/cut/selected_stitched_xml/P{}.xml".format(ii), encoding="utf-8",
             xml_declaration=True)

4. 训练集验证集划分（包含划分到指定文件夹）

随机划分训练集验证集，生成以下4个txt，txt内容为图片名称。代码是以标签文件为基础进行划分的(若根据标签为txt或xml修改代码中的后缀)，当然也可以根据图片文件夹划分，需要自己对代码加以修改。
在这里插入图片描述

import os
import random

random.seed(10)  # 设置随机数种子,复现随机场景所必须的

xmlFilePath = r'/home/dataset/NWPU VHR-10/ground truth' #标签文件夹名称
saveBasePath = r"/home/dataset/NWPU VHR-10/" #输出存储路径

trainval_percent = 1  # trainval_percent=0.9# 表示余下的百分之十用于test
train_percent = 0.8  # train_percent=1 # 表示训练集中用于训练，没有用于验证

temp_xml = os.listdir(xmlFilePath)  # 获得一个列表,每个元素是一个文件名
total_xml = []  # 用于保存所有xml文件的文件名
for xml in temp_xml:  # 遍历文件夹下所有文件
    #if xml.endswith(".xml"):  # 判断文件名是否以.xml结尾
    if xml.endswith(".txt"):  # 判断文件名是否以.txt结尾
        total_xml.append(xml)

num = len(total_xml)  # 所有xml文件的总数
indices = list(range(num))  # 获得迭代类型,0 ~ (num-1)
tv = int(num * trainval_percent)  # 用于训练和验证的数量
tr = int(tv * train_percent)  # 用于训练的数量
trainval = random.sample(indices, tv)  # 用于训练和验证的样本的索引
train = random.sample(trainval, tr)  # 用于训练的样本的索引

print("train and validation set size:", tv)  # 训练样本和验证样本的总数
print("train set size:", tr)  # 训练样本的数量
ftrainval = open(saveBasePath+'trainval.txt','w')  # 依次打开4个文件
ftest = open(saveBasePath+'test.txt', 'w')
ftrain = open(saveBasePath+'train.txt', 'w')
fval = open(saveBasePath+'val.txt', 'w')

for i in indices:
    name = total_xml[i][:-4] + '\n'  # 文件名+'\n',其中文件名不含.xml
    if i in trainval:  # 训练集和验证集的索引
        ftrainval.write(name)  # 写入训练和验证的文件中
        if i in train:  # 训练集的索引
            ftrain.write(name)  # 写入训练的文件中
        else:
            fval.write(name)  # 写入验证的文件中
    else:
        ftest.write(name)  # 否则归于测试集,写入测试的文件中

ftrainval.close()  # 依次关闭4个文件
ftrain.close()
fval.close()
ftest.close()

划分完成后，若是想要将train和val图片放置于不同文件夹，运行以下代码：

'''
功能：复制图片到指定文件夹
'''
import os
import random
import shutil



def newaddimg():
    imgtxt = '/home/dataset/NWPU VHR-10/train.txt'
    a = open(imgtxt)
    file = a.readlines()
    c = []
    for line in file:
        line = line.strip('\n')
        c.append(line)  # trainval中的文件名都加入列表c中
    return c



if __name__ == '__main__':
    imgpath = r'/home/dataset/NWPU VHR-10/images/'
    newimage_file= r'/home/dataset/NWPU VHR-10/train2017/'
    if not os.path.exists(newimage_file):
        os.makedirs(newimage_file)


    newimg = newaddimg()
    for i in range(len(newimg)):
        shutil.copy(os.path.join(imgpath, newimg[i] + '.jpg'), newimage_file)  # 复制图片到新文件夹

暮已深

关注

3
点赞
踩
12

收藏

觉得还不错? 一键收藏
0
评论
【目标检测】一些数据集处理常用代码

一些数据处理常用代码在训练目标检测模型时，不同的数据集标签格式不一，不同模型要求的数据格式不一，最近一直在各种格式转换，用到的代码(大多数是找的别人的经试验能用的，部分是根据自己需求写的)在这儿总结一下。文章目录一些数据处理常用代码1.标签数据格式转换(1).txt转xml【DOTA->VOC】(2).标签转为json格式【NWPU/DIOR->COCO】2. 影像及标签裁剪(1).NWPU VHR(2).DOTA3. 影像拼接1.标签数据格式转换(1).txt转xml【DOTA-&gt
复制链接

扫一扫