一. 标签文件格式转换
因为使用labelImg标注数据集默认的输出格式是xml,而YOLOv5需要的标签格式一般是txt,所以需要进入格式转换,下面代码实测有效:
import xml.etree.ElementTree as ET
import pickle
import os
from os import listdir, getcwd
from os.path import join
import glob
classes = ["person"]
def convert(size, box):
dw = 1.0 / size[0]
dh = 1.0 / size[1]
x = (box[0] + box[1]) / 2.0
y = (box[2] + box[3]) / 2.0
w = box[1] - box[0]
h = box[3] - box[2]
x = x * dw
w = w * dw
y = y * dh
h = h * dh
return (x, y, w, h)
# 注意如果是在windows系统下那么路径里的反斜杠\应该多写一个,否则会和python的‘\’产生语义冲突
def convert_annotation(image_name):
in_file = open('D:\\Users\\labels\\' + image_name[:-3] + 'xml') # xml文件路径
out_file = open('D:\\Users\\labels_txt\\' + image_name[:-3] + 'txt', 'w') # 转换后的txt文件存放路径
f = in_file
xml_text = f.read()
root = ET.fromstring(xml_text)
f.close()
size = root.find('size')
w = int(size.find('width').text)
h = int(size.find('height').text)
for obj in root.iter('object'):
cls = obj.find('name').text
if cls not in classes:
print(cls)
continue
cls_id = classes.index(cls)
xmlbox = obj.find('bndbox')
b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text),
float(xmlbox.find('ymax').text))
bb = convert((w, h), b)
out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
wd = getcwd()
if __name__ == '__main__':
filenames = os.listdir('D:\\Users\\labels\\')# xml文件路径,这样输出的txt文件和xml一一对应
for label_path in filenames:
print(label_path)
convert_annotation(label_path)
二.自动划分训练集和验证集
主要需要注意的是,因为有些图片没有标签,所以图片和标签不是一一匹配的,因此需要在标签划分时产生路径找不到的异常时,直接跳过(pass)
import os
import random
import shutil
def moveFile(input1,input2,save1,save2):
pathDir = os.listdir(input1) # 取图片的原始路径
random.seed(1) # 设置随机种子
filenumber = len(pathDir) # 原文件个数
rate = 0.1 # 设置抽取的验证集的比例
picknumber = int(filenumber * rate) # 按照rate比例从文件夹中取图片
sample = random.sample(pathDir, picknumber) # 随机选取需要数量的样本图片
print(sample)
list_len=len(sample)
print(list_len)
list=[]
for i in range(len(sample)):
list.append(sample[i].split('.')[0])
print(list)
for flie_name in list:
path_img=os.path.join(input1,flie_name+'.png')
shutil.move(path_img,save1)
try:
path_lab=os.path.join(input2,flie_name+'.txt')
shutil.move(path_lab,save2)
except: # 异常捕获,然后跳过
pass
continue
if __name__ == '__main__':
input_path1='.\\images\\'
input_path2= '.\\labels\\'
save_img='.\\val\\images\\'
save_lab='.\\val\\labels\\'
if not os.path.exists(save_lab):
os.makedirs(save_lab)
if not os.path.exists(save_img):
os.makedirs(save_img)
moveFile(input_path1,input_path2,save_img,save_lab)```