数据集格式转换代码2 VOC_xml2yolo_txt
import re
import os
from tqdm import tqdm
#20220817 增加category_id目标类型字段。
#VOC标签文件xml 转换为yolo的txt
#1.修改输出 目录 objfiles_name
objfiles_name = "class-mini-2208/"
# objfiles_name = "catdog/"
mainpath = './dataset/coco2yolo/'+objfiles_name
xml_dir = mainpath+'annotations_VOC'
lables_dir = mainpath+'labels'
dataset_lists = ['val2017','train2017']
#将指定目录内的xml转为yolo格式需要的txt
def to_one(category_id,name_list, xmin, ymin, xmax, ymax, width, height, name):
data = []
num = int(category_id[0])
for x1, y1, x2, y2 in zip(xmin, ymin, xmax, ymax):
x1 = float(x1)
y1 = float(y1)
x2 = float(x2)
y2 = float(y2)
w1 = float(width[0])
h1 = float(height[0])
x = (x2 - x1) / 2 + x1
y = (y2 - y1) / 2 + y1
w = x2 - x1
h = y2 - y1
x = x / w1
y = y / h1
w = w / w1
h = h / h1
data.append(' '.join([str(num), str(x), str(y), str(w), str(h),'\n']))
# num += 1
# print(data)
fname = "%s.txt" % name
# print(fname)
with open(fname, 'w') as f:
f.writelines(data)
def xml_to_yolo(path):
files_list = os.listdir(path)
files_path = []
fCnt = 0
for file in files_list:
files_path.append(os.path.join(path, file))
for file in tqdm(files_path):
with open(file, 'r') as f:
data = f.read()
category_id = re.findall('<category_id>(.*?)</category_id>', data)
name_list = re.findall('<name>(.*?)</name>', data)
xmin = re.findall('<xmin>(.*?)</xmin>', data)
ymin = re.findall('<ymin>(.*?)</ymin>', data)
xmax = re.findall('<xmax>(.*?)</xmax>', data)
ymax = re.findall('<ymax>(.*?)</ymax>', data)
width = re.findall('<width>(.*?)</width>', data)
height = re.findall('<height>(.*?)</height>', data)
#注意输出的文件名:
# name = os.path.split(file) [1] #000000000139.xml
name = file.split('/')[9].split('.')[0]#000000000139
# print(name)
# print(name_list)
to_one(category_id,name_list, xmin, ymin, xmax, ymax, width, height,name)
fCnt+=1
# if fCnt>3:#调试时,控制数量
# return
# if fCnt%10000 == 0:
# print('f Num:',fCnt)
print('f Num Sum:',fCnt)
def make_dir(lables_dir):
if os.path.exists(lables_dir):
os.chdir(lables_dir)
else:
os.makedirs(lables_dir)
os.chdir(lables_dir)
if __name__ == '__main__':
i = 1
for dataset in dataset_lists:
xml_path = os.path.join(xml_dir, dataset)
lables_path = os.path.join(lables_dir, dataset)
print(xml_path)
print(lables_path)
make_dir(lables_path)
xml_to_yolo(xml_path)
print('完成:',dataset)
print('exit')
```