Abstract
Yolo格式的数据集不同于coco和voc数据集的格式.它通过文本的格式保存了gt框的坐标,如果只将数据集用于基于box的目标检测的话. 这种格式的数据集显然是一种不错的选择,它可以很方便的被管理. 如果想要为这个数据集增添其他信息,比如说语义信息, 点云信息等…显然, 对于txt来说过于臃肿了, 还是用xml和json格式保存比较方便.
Introduction
1 yolo数据集介绍
yolo格式的数据集支持两种指定train,val,test划分的方式, 一种是基于文件的划分方式. 一种是基于文本的划分方式.
我们先来看第一种方式,基于文本进行划分如下面的tree图所示
首先会创建train.txt, test.txt ,val.txt等用来保存图片路径.这些图片可以默认放在根目录的images文件夹下.只需要将文件的路径保存到txt文件中,便可以被yolov5读取到,即使是一个错误的路径也不用担心,在运行代码中,yolov5会提示哪些图片文件是缺失的.如果使用的不是通用数据集.例如coco2017,voc2012等等,都建议使用这种方式灵活的管理数据集
优点:可以很好的管理train,val,test文件的划分.一张图片路径可以出现在多个txt文件中,减少了存放图片的空间.
缺点:文本文件路径是固定的,所以数据集的位置也需要相对固定
#基于文本划分数据集
dataset
├── images
│ ├── 001.jpg
│ ├── 002.jpg
│ ├── 003.jpg
│ └── 004.jpg
│
├── labels
│ ├── 001.txt
│ ├── 002.txt
│ ├── 003.txt
│ └── 004.txt
│
└── ImageSets
├── train.txt
├── trainval.txt
├── test.txt
└── val.txt
基于文件夹划分的方式如下树状图所示
它不再需要txt文件指定数据集的划分了,只需要在images下面,新建train,test的文件夹.同时将对应的图片放进去就行了,比较方便. 但是其无法再对数据集进行灵活的划分了.同时,如果一张图片存在于train和val中,那么图片就需要两份放在各自的文件夹中,增加了内存空间.
通用数据集使用这种格式的数据集是比较友好的
#格基于文件划分数据集
dataset
├── images
│ └── train
│ │ ├── 001.jpg
│ │ └── 002.jpg
│ │
│ └── test
│ ├── 003.jpg
│ └── 004.jpg
│
└── labels
├──train
│ ├── 001.txt
│ └── 002.txt
│
└── test
├── 003.txt
└── 004.txt
2, 数据集转换工具
由于大部分的数据集都是voc和coco格式的,因此格式的转换让人头疼,在这里我提供了3套工具, 分别是voc转换coco格式工具, coco转换voc格式工具, voc转换yolo格式工具.
这些代码已经实操很多遍了,基本没有问题
使用这个数据集工具有几个点需要注意
1, 这些工具配置参数均在最前面
2,有些coco数据集里面写图片名字的时候加了.jpg,因此需要在工具中也加上.jpg,在使用过程中会发现这一问题
#coco数据集转voc数据集
from pycocotools.coco import COCO
import skimage.io as io
import matplotlib.pyplot as plt
import pylab,os,cv2,shutil
from lxml import etree, objectify
from tqdm import tqdm
import random
from PIL import Image
import os
pylab.rcParams['figure.figsize'] = (8.0, 10.0)
#修改位置
CKdir="./voc2017"
CK5cats = [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
'hair drier', 'toothbrush' ]
dataDir='./coco2017'
instance="instances_{}.json"
dataTypes=['val2017',"train2017"]
#========================================================================================
CKimg_dir=os.path.join(CKdir,"JPEGImages")
CKanno_dir=os.path.join(CKdir,"Annotations")
#是否自己分割数据集
trans_by_self=False
def mkr(dir):
if not os.path.exists(dir):
os.makedirs(dir)
def showimg(coco,dataType,img,CK5Ids):
global dataDir
I = io.imread('%s/%s/%s' % (dataDir, dataType, img['file_name']))
plt.imshow(I)
plt.axis('off')
annIds = coco.getAnnIds(imgIds=img['id'], catIds=CK5Ids, iscrowd=None)
anns = coco.loadAnns(annIds)
coco.showAnns(anns)
plt.show()
def save_annotations(dataType,filename,objs):
annopath=os.path.join(CKanno_dir,filename[:-3]+"xml")
img_path=dataDir+"/"+dataType+"/"+filename
dst_path=CKimg_dir+"/"+filename
img=cv2.imread(img_path)
'''
#删掉不是RGB的图像
im=Image.open(img_path)
if im.mode!="RGB":
shutil.copy(img_path, "./tmp/"+filename)
print(filename+" not a RGB image")
im.close()
return
im.close()
'''
shutil.copy(img_path, dst_path)
E = objectify.ElementMaker(annotate=False)
anno_tree = E.annotation(
E.folder('1'),
E.filename(filename),
E.source(
E.database('CKdemo'),
E.annotation('VOC'),
E.image('CK')
),
E.size(
E.width(img.shape[1]),
E.height(img.shape[0]),
E.depth(img.shape[2])
),
E.segmented(0)
)
for obj in objs:
E2 = objectify.ElementMaker(annotate=False)
anno_tree2 = E2.object(
E.name(obj[0]),
E.pose(),
E.truncated("0"),
E.difficult(0),
E.bndbox(
E.xmin(obj[2]),
E.ymin(obj[3]),
E.xmax(obj[4]),
E.ymax(obj[5])
)
)
anno_tree.append(anno_tree2)
etree.ElementTree(anno_tree).write(annopath, pretty_print=True)
def showbycv(coco,dataType,img,classes,CK5Ids):
global dataDir
filename= img['file_name']
filepath='%s/%s/%s' % (dataDir, dataType,filename)
I = cv2.imread(filepath)
annIds = coco.getAnnIds(imgIds=img['id'], catIds=CK5Ids, iscrowd=None)
anns = coco.loadAnns(annIds)
objs=[]
for ann in anns:
name=classes[ann['category_id']]
if name in CK5cats:
if 'bbox' in ann:
bbox = ann['bbox']
xmin=(int)(bbox[0])
ymin=(int)(bbox[1])
xmax=(int)(bbox[2]+bbox[0])
ymax=(int)(bbox[3]+bbox[1])
obj=[name,1.0,xmin,ymin,xmax,ymax]
objs.append(obj)
cv2.rectangle(I, (xmin,ymin),(xmax,ymax),(255,0,0))
cv2.putText(I,name,(xmin,ymin),3,1,(0,0,255))
save_annotations(dataType,filename,objs)
cv2.imwrite("tmp.jpg",I)
#cv2.imshow("img",I)
#cv2.waitKey(1)
return filename[:-4]
def catid2name(coco):
classes=dict()
for cat in coco.dataset['categories']:
classes[cat['id']]=cat['name']
#print(str(cat['id'])+":"+cat['name'])
return classes
def get_CK5():
imgids={}
for dataType in dataTypes:
annFile = ('{}/annotations/'+instance).format(dataDir, dataType)
coco = COCO(annFile)
CK5Ids = coco.getCatIds(catNms=CK5cats)
print(len(CK5cats))
classes=catid2name(coco)
print(CK5Ids,classes)
img_ids = coco.get_img_ids()
print(len(img_ids))
for imgId in tqdm(img_ids):
img=coco.loadImgs(imgId)[0]
filename=showbycv(coco,dataType,img,classes,CK5Ids)
def split():
imgnames={}
dataset_dir=CKdir
list_imgs={}
for dataType in dataTypes:
imgnames[dataType]=os.listdir(os.path.join(dataDir,dataType))
print(len(imgnames[dataType]))
for dataType in dataTypes:
annFile = ('{}/annotations/'+instance).format(dataDir, dataType)
coco = COCO(annFile)
cat_ids = coco.get_cat_ids(cat_names=CK5cats)
img_ids = coco.get_img_ids()
for imgId in tqdm(img_ids):
img=coco.loadImgs(img_ids)[0]
filename=img["file_name"]
if filename not in imgnames[dataType]:
print("not found",filename)
maindir=os.path.join(dataset_dir,"ImageSets","Main")
mkr(maindir)
for dataType in dataTypes:
f=open(os.path.join(CKdir,"ImageSets","Main",dataType+".txt"),"w")
for s in imgnames[dataType]:
f.write(s+"\n")
f.close()
#split train and test for training
def split_traintest(trainratio=0.7,valratio=0.2,testratio=0.1):
dataset_dir=CKdir
files=os.listdir(CKimg_dir)
trains=[]
vals=[]
trainvals=[]
tests=[]
random.shuffle(files)
for i in range(len(files)):
filepath=CKimg_dir+"/"+files[i][:-3]+"jpg"
if(i<trainratio*len(files)):
trains.append(files[i])
trainvals.append(files[i])
elif i<(trainratio+valratio)*len(files):
vals.append(files[i])
trainvals.append(files[i])
else:
tests.append(files[i])
#write files for voc
maindir=os.path.join(dataset_dir,"ImageSets","Main")
mkr(maindir)
with open(os.path.join(maindir,"train.txt"),"w") as f:
for line in trains:
line=line[:line.rfind(".")]
f.write(line+"\n")
with open(os.path.join(maindir,"val.txt"),"w") as f:
for line in vals:
line=line[:line.rfind(".")]
f.write(line+"\n")
with open(os.path.join(maindir,"trainval.txt"),"w") as f:
for line in trainvals:
line=line[:line.rfind(".")]
f.write(line+"\n")
with open(os.path.join(maindir,"test.txt"),"w") as f:
for line in tests:
line=line[:line.rfind(".")]
f.write(line+"\n")
print("spliting done")
if __name__=="__main__":
mkr(CKimg_dir)
mkr(CKanno_dir)
if not trans_by_self:
print("split")
split()
get_CK5()
if trans_by_self:
split_traintest()
#voc转coco数据集
import sys
import os
import shutil
import numpy as np
import json
import xml.etree.ElementTree as ET
# 检测框的ID起始值
START_BOUNDING_BOX_ID = 1
# 类别列表无必要预先创建,程序中会根据所有图像中包含的ID来创建并更新
PRE_DEFINE_CATEGORIES = {}
# If necessary, pre-define category and its id
# PRE_DEFINE_CATEGORIES = {"aeroplane": 1, "bicycle": 2, "bird": 3, "boat": 4,
# "bottle":5, "bus": 6, "car": 7, "cat": 8, "chair": 9,
# "cow": 10, "diningtable": 11, "dog": 12, "horse": 13,
# "motorbike": 14, "person": 15, "pottedplant": 16,
# "sheep": 17, "sofa": 18, "train": 19, "tvmonitor": 20}
#配置参数
sets = ['train','test','val','trainval']
coco_path="./Pest24_coco"
rootpath="./Pest24_voc"
train_percent = 0.9
trainval_percent = 0.7
#===================================================
def get(root, name):
vars = root.findall(name)
return vars
def get_and_check(root, name, length):
vars = root.findall(name)
if len(vars) == 0:
raise NotImplementedError('Can not find %s in %s.'%(name, root.tag))
if length > 0 and len(vars) != length:
raise NotImplementedError('The size of %s is supposed to be %d, but is %d.'%(name, length, len(vars)))
if length == 1:
vars = vars[0]
return vars
def convert(xml_list, xml_dir, json_file):
'''
:param xml_list: 需要转换的XML文件列表
:param xml_dir: XML的存储文件夹
:param json_file: 导出json文件的路径
:return: None
'''
list_fp = xml_list
image_id=1
# 标注基本结构
json_dict = {"images":[],
"type": "instances",
"annotations": [],
"categories": []}
categories = PRE_DEFINE_CATEGORIES
bnd_id = START_BOUNDING_BOX_ID
for line in list_fp:
line = line.strip()
if line.find("xml")<0:
continue
print("\r Processing {}".format(line),end="")
# 解析XML
xml_f = os.path.join(xml_dir, line)
tree = ET.parse(xml_f)
root = tree.getroot()
filename = root.find('filename').text
if filename[-4:]!=".jpg":
filename+=".jpg"
elif filename[-3]!=".":
raise
# 取出图片名字
image_id+=1
size = get_and_check(root, 'size', 1)
# 图片的基本信息
width = int(get_and_check(size, 'width', 1).text)
height = int(get_and_check(size, 'height', 1).text)
image = {'file_name': filename,
'height': height,
'width': width,
'id':image_id}
json_dict['images'].append(image)
# 处理每个标注的检测框
for obj in get(root, 'object'):
# 取出检测框类别名称
category = get_and_check(obj, 'name', 1).text
# 更新类别ID字典
if category not in categories:
new_id = len(categories)
categories[category] = new_id
category_id = categories[category]
bndbox = get_and_check(obj, 'bndbox', 1)
xmin = int(get_and_check(bndbox, 'xmin', 1).text) - 1
ymin = int(get_and_check(bndbox, 'ymin', 1).text) - 1
xmax = int(get_and_check(bndbox, 'xmax', 1).text)
ymax = int(get_and_check(bndbox, 'ymax', 1).text)
assert(xmax > xmin)
assert(ymax > ymin)
o_width = abs(xmax - xmin)
o_height = abs(ymax - ymin)
annotation = dict()
annotation['area'] = o_width*o_height
annotation['iscrowd'] = 0
annotation['image_id'] = image_id
annotation['bbox'] = [xmin, ymin, o_width, o_height]
annotation['category_id'] = category_id
annotation['id'] = bnd_id
annotation['ignore'] = 0
# 设置分割数据,点的顺序为逆时针方向
annotation['segmentation'] = [[xmin,ymin,xmin,ymax,xmax,ymax,xmax,ymin]]
json_dict['annotations'].append(annotation)
bnd_id = bnd_id + 1
print()
# 写入类别ID字典
for cate, cid in categories.items():
cat = {'supercategory': 'none', 'id': cid, 'name': cate}
json_dict['categories'].append(cat)
# 导出到json
#mmcv.dump(json_dict, json_file)
json_data = json.dumps(json_dict)
with open(json_file, 'w') as w:
w.write(json_data)
if __name__ == '__main__':
total_xml = os.listdir(os.path.join(rootpath,"Annotations"))
num = len(total_xml)
print("一共有",num,"张被标注的图片")
if not os.path.exists(os.path.join(coco_path,'annotations')):
os.makedirs(os.path.join(coco_path,'annotations'))
com=True
image_ids={}
for s in sets:
image_ids[s]=[]
for i in range(len(sets)):
if not os.path.exists(os.path.join(rootpath,"ImageSets","Main",sets[0]+".txt")):
com=False
if com==True:
for image_set in sets:
print("find...",image_set)
n=open(rootpath+'/ImageSets/Main/%s.txt' % (image_set)).read().strip().split()
for i in range(len(n)):
n[i]+=".xml"
image_ids[image_set]=n
# image_ids[image_set]=[image_ids[image_set][i]+=".xml" for i in range(len(image_ids[image_set]))]
else:
sets = ['train','test','val','trainval']
print("don't find Set in ImageSets,try to create it",sets)
list_num = range(num)
def data_split(full_list, ratio, shuffle=False):
n_total = len(full_list)
offset = int(n_total * ratio)
if n_total == 0 or offset < 1:
return [], full_list
if shuffle:
random.shuffle(full_list)
sublist_1 = full_list[:offset]
sublist_2 = full_list[offset:]
return sublist_1, sublist_2
trainval,test=data_split(list_num,trainval_percent)
train,val=data_split(trainval,train_percent)
for i in list_num:
name = total_xml[i]
if i in trainval:
image_ids[sets[3]].append(name)
if i in train:
image_ids[sets[0]].append(name)
else:
image_ids[sets[2]].append(name)
else:
image_ids[sets[1]].append(name)
print("get train and test")
for s in sets:
if not os.path.exists(os.path.join(coco_path, s)):
os.makedirs(os.path.join(coco_path, s))
xml_dir = os.path.join(rootpath,'Annotations') #已知的voc的标注
xml_labels = os.listdir(xml_dir)
for image_set in sets:
json_file = os.path.join(coco_path,'annotations/instances_{}.json'.format(image_set))
convert(image_ids[image_set], xml_dir, json_file)
print("deal images")
for image_set in sets:
print("deal",image_set,len(image_ids[image_set]))
image_idss=image_ids[image_set]
i=0
for image_id in image_idss:
print("\r",round(i/len(image_idss),4),end="")
img_name=image_id[:-4]+".jpg"
i+=1
shutil.copy(os.path.join(rootpath, 'JPEGImages', img_name),
os.path.join(coco_path, image_set, img_name))
print()
print("down")
#voc格式转yolo格式
import os
import random
import xml.etree.ElementTree as ET
import pickle
from os import listdir, getcwd
from os.path import join
import shutil
#配置参数
sets = ['trainval']
classes =[ 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog','horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor' ]
savepath="./yolo0712"
rootpath="./VOC2012"
train_percent = 0.9
trainval_percent = 0.7
#===================================================================================
if savepath.split("/")[-1]!="":
savename=savepath.split("/")[-1]
else:
savename=savepath.split("/")[-2]
print(savename)
if not os.path.exists(savepath):
os.makedirs(savepath)
total_xml = os.listdir(os.path.join(rootpath,"Annotations"))
num = len(total_xml)
print("一共有",num,"张被标注的图片")
if not os.path.exists(savepath):
os.makedirs(savepath)
if not os.path.exists(savepath+'/labels/'):
os.makedirs(savepath+'/labels/')
if not os.path.exists(savepath+'/images/'):
os.makedirs(savepath+'/images/')
if not os.path.exists(savepath+'/ImageSets/'):
os.makedirs(savepath+'/ImageSets/')
com=True
image_ids={}
for s in sets:
image_ids[s]=[]
for i in range(len(sets)):
if not os.path.exists(os.path.join(rootpath,"ImageSets","Main",sets[0]+".txt")):
com=False
if com==True:
for image_set in sets:
print("find...",image_set)
image_ids[image_set]=open(rootpath+'/ImageSets/Main/%s.txt' % (image_set)).read().strip().split()
else:
sets = ['train','test','val','trainval']
print("don't find Set in ImageSets,try to create it",sets)
list_num = range(num)
def data_split(full_list, ratio, shuffle=False):
n_total = len(full_list)
offset = int(n_total * ratio)
if n_total == 0 or offset < 1:
return [], full_list
if shuffle:
random.shuffle(full_list)
sublist_1 = full_list[:offset]
sublist_2 = full_list[offset:]
return sublist_1, sublist_2
trainval,test=data_split(list_num,trainval_percent)
train,val=data_split(trainval,train_percent)
for i in list_num:
name = total_xml[i][:-4]
if i in trainval:
image_ids[sets[3]].append(name)
if i in train:
image_ids[sets[0]].append(name)
else:
image_ids[sets[2]].append(name)
else:
image_ids[sets[1]].append(name)
print("get train and test")
def convert(size, box):
dw = 1. / size[0]
dh = 1. / size[1]
x = (box[0] + box[1]) / 2.0
y = (box[2] + box[3]) / 2.0
w = box[1] - box[0]
h = box[3] - box[2]
x = x * dw
w = w * dw
y = y * dh
h = h * dh
return (x, y, w, h)
def convert_annotation(image_id):
in_file = open(rootpath+'/Annotations/%s.xml' % (image_id))
out_file = open(savepath+'/labels/%s.txt' % (image_id), 'w')
tree = ET.parse(in_file)
root = tree.getroot()
size = root.find('size')
w = int(size.find('width').text)
h = int(size.find('height').text)
for obj in root.iter('object'):
difficult = obj.find('difficult').text
cls = obj.find('name').text
if cls not in classes or int(difficult) == 1:
continue
cls_id = classes.index(cls)
xmlbox = obj.find('bndbox')
b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text),
float(xmlbox.find('ymax').text))
bb = convert((w, h), b)
out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
for image_set in sets:
image_idss=image_ids[image_set]
print("deal",image_set,len(image_idss))
list_file = open(savepath+"/ImageSets/"+'%s.txt' % (image_set), 'w')
i=0
for image_id in image_idss:
print("\r",round(i/len(image_idss),4),end="")
i+=1
list_file.write(savename+'/images/%s.jpg\n' % (image_id))
convert_annotation(image_id)
print()
list_file.close()
print("deal images")
for image_set in sets:
image_idss=image_ids[image_set]
print("deal",image_set,len(image_idss))
i=0
for image_id in image_idss:
print("\r",round(i/len(image_idss),4),image_id,end="")
i+=1
img_name=image_id+".jpg"
shutil.copy(os.path.join(rootpath, 'JPEGImages', img_name),
os.path.join(savepath, 'images', img_name))
print()
print("down")
3 yaml文件配置
最后,我们在yolov5根目录下的data文件夹中配置数据集的yaml文件
如下所示
#如果是文本格式的话指定文本路径,如果是文件夹格式指定文件夹路径
train:./root/ImageSets/trainval.txt# 16551 images
#train: ./root/images/trainval
val: ./root/ImageSets/test.txt/ # 4952 images
#val:./root/images/val
#class分类
nc: 20
names: [ 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor' ]