"""
the function of the code
自动化将labelimg标注生成的数据转成yolov5格式,并划分数据集
author:@bjtu_huangyuxiang(转载请备注)
"""
import xml.etree.ElementTree as ET
import os
import shutil
import random
# 数据标签
classes = ['negative_feeder_shoulder']
def convert(size, box):
dw = 1./(size[0])
dh = 1./(size[1])
x = (box[0] + box[1])/2.0 - 1
y = (box[2] + box[3])/2.0 - 1
w = box[1] - box[0]
h = box[3] - box[2]
x = x*dw
w = w*dw
y = y*dh
h = h*dh
if w>=1:
w=0.99
if h>=1:
h=0.99
return (x,y,w,h)
def convert_annotation(xmlpath,outpath):
"""
xmlpath:labelimg生成的含注释的文件夹路径
outpath:转成txt输出路径
"""
if not os.path.exists(outpath): #判断输出目录是否存在
os.makedirs(outpath)
xmlname = os.listdir(xmlpath)
print(xmlname)
i = 0
for i in range(0,len(xmlname)):
xmlfile = os.path.join(xmlpath,xmlname[i])
print(xmlfile)
with open(xmlfile, "r", encoding='UTF-8') as in_file:
txtname = xmlname[i][:-4]+'.txt'
print(txtname)
#txtpath = outpath + '/labels'
if not os.path.exists(outpath):
os.makedirs(outpath)
txtfile = os.path.join(outpath,txtname)
with open(txtfile, "w+" ,encoding='UTF-8') as out_file:
tree=ET.parse(in_file)
root = tree.getroot()
size = root.find('size')
w = int(size.find('width').text)
h = int(size.find('height').text)
out_file.truncate()
for obj in root.iter('object'):
difficult = obj.find('difficult').text
cls = obj.find('name').text
if cls not in classes or int(difficult)==1:
continue
cls_id = classes.index(cls)
xmlbox = obj.find('bndbox')
b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
bb = convert((w,h), b)
out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
i += 1
print('共计%s个文件转换完成'%i)
train_test_move(outpath, datasetpath, img_path)
def train_test_move(outpath,datasetpath,img_path):
files = os.listdir(outpath)
l = len(files)
random.shuffle(files) #
sets = ['train', 'valid', 'test']
k = 0
p = 0.8 #默认训练集0.8,验证集0.2。不要测试集
for i in sets:
if not os.path.exists(datasetpath+i):
print(datasetpath+i)
os.mkdir(datasetpath+i)
os.mkdir(datasetpath+i+"/images")
os.mkdir(datasetpath+i+"/labels")
for file in files[round(l*k):round(l*p)]:
shutil.copy(outpath+file,datasetpath+i+"/labels")
shutil.copy(img_path+file[:-3]+"jpg",datasetpath+i+"/images")
k = p
p += 0.2
print("succeess!")
" ------------------------ main --------------------------- "
if __name__ == "__main__":
xmlpath = r'D:\desktop\merge_i_f_h_w_f\xml/' #xml文件转txt,xml的路径
outpath = r'D:\desktop\merge_i_f_h_w_f/labels/' #txt输出路径,同时也是划分数据集txt的输入
img_path = r'D:\desktop\merge_i_f_h_w_f\images/' #数据集划分的图像路径
datasetpath = r'D:\desktop\merge_i_f_h_w_f/' #数据集划分的输出路径
convert_annotation(xmlpath,outpath)
自动化将labelimg标注生成的VOC数据格式转成yolov5格式,并划分数据集(只需执行一次,即可完成)
最新推荐文章于 2024-07-17 15:44:06 发布