voc to yolo
import xml.etree.ElementTree as ET
import os
# xml文件所在目录
xml_dir = "D:\PyCharm Community Edition 2023.2.1\project\gu\yolov7-main\mydata/voc\Annotations"
# Yolo格式文件保存目录
yolo_dir = "D:\PyCharm Community Edition 2023.2.1\project\gu\yolov7-main\mydata/voc/txt"
# 类别名称和数字标签的映射
class_map = {"motor": 0,"person": 1,"car":2}
# 遍历XML文件夹中的所有文件
for xml_file in os.listdir(xml_dir):
if not xml_file.endswith(".xml"):
continue
# 解析XML文件
tree = ET.parse(os.path.join(xml_dir, xml_file))
root = tree.getroot()
# 获取图像尺寸
size = root.find("size")
width = int(size.find("width").text)
height = int(size.find("height").text)
# 遍历所有目标
for obj in root.iter("object"):
# 获取类别和边界框坐标
cls_name = obj.find("name").text
if cls_name not in class_map:
continue
cls_id = class_map[cls_name]
bbox = obj.find("bndbox")
xmin = float(bbox.find("xmin").text)
ymin = float(bbox.find("ymin").text)
xmax = float(bbox.find("xmax").text)
ymax = float(bbox.find("ymax").text)
# 计算归一化坐标
x = (xmin + xmax) / (2 * width)
y = (ymin + ymax) / (2 * height)
w = (xmax - xmin) / width
h = (ymax - ymin) / height
# 将信息写入Yolo格式文件
yolo_file = os.path.splitext(xml_file)[0] + ".txt"
with open(os.path.join(yolo_dir, yolo_file), "a") as f:
f.write(f"{cls_id} {x:.6f} {y:.6f} {w:.6f} {h:.6f}\n")
随机划分数据集
import os
import shutil
import random
# 保证随机可复现
random.seed(0)
# def mk_dir(file_path):
# if os.path.exists(file_path):
# # 如果文件夹存在,则先删除原文件夹在重新创建
# shutil.rmtree(file_path)
# os.makedirs(file_path)
def split_data(file_path, new_file_path, train_rate, val_rate, test_rate):
# yolov5训练自己数据集时 准备了images图片文件夹和txt标签文件夹;但是
# 需要分割训练集、验证集、测试集3个文件夹,每个文件夹有images和labels
# 2个文件夹;此方法可以把imags和labels总文件夹,分割成3个文件夹;
#file_path ='D:\PyCharm Community Edition 2023.2.1\project\gu\yolov7-main\mydata/voc\Image'
#xmlpath= 'D:\PyCharm Community Edition 2023.2.1\project\gu\yolov7-main\mydata/voc/txt'
#new_file_path='D:\PyCharm Community Edition 2023.2.1\project\gu\yolov7-main\mydata/voc\labels'
eachclass_image = []
for image in os.listdir(file_path):
eachclass_image.append(image)
total = len(eachclass_image)
random.shuffle(eachclass_image)
train_images = eachclass_image[0:int(train_rate * total)] # 注意左闭右开
val_images = eachclass_image[int(train_rate * total):int((train_rate + val_rate) * total)] # 注意左闭右开
test_images = eachclass_image[int((train_rate + val_rate) * total):]
#训练集
for image in train_images:
print(image)
old_path = file_path + '/' + image
new_path1 = new_file_path + '/' + 'train' + '/' + 'images'
if not os.path.exists(new_path1):
os.makedirs(new_path1)
new_path = new_path1 + '/' + image
# print(new_path)
shutil.copy(old_path, new_path)
new_name = os.listdir(new_file_path + '/' + 'train' + '/' + 'images')
# print(new_name[1][:-4])
for im in new_name:
old_xmlpath = xmlpath + '/' + im[:-3] + 'txt'
print('old',old_xmlpath)
new_xmlpath1 = new_file_path + '/' + 'train' + '/' + 'labels'
if not os.path.exists(new_xmlpath1):
os.makedirs(new_xmlpath1)
new_xmlpath = new_xmlpath1 + '/' + im[:-3] + 'txt'
print('xml name',new_xmlpath)
if not os.path.exists(f'{old_xmlpath}'):
open(f'{old_xmlpath}', 'w')
shutil.copy(old_xmlpath, new_xmlpath)
#验证集
for image in val_images:
old_path = file_path + '/' + image
new_path1 = new_file_path + '/' + 'val' + '/' + 'images'
if not os.path.exists(new_path1):
os.makedirs(new_path1)
new_path = new_path1 + '/' + image
shutil.copy(old_path, new_path)
new_name = os.listdir(new_file_path + '/' + 'val' + '/' + 'images')
for im in new_name:
old_xmlpath = xmlpath + '/' + im[:-3] + 'txt'
new_xmlpath1 = new_file_path + '/' + 'val' + '/' + 'labels'
if not os.path.exists(new_xmlpath1):
os.makedirs(new_xmlpath1)
new_xmlpath = new_xmlpath1 + '/' + im[:-3] + 'txt'
if not os.path.exists(f'{old_xmlpath}'):
open(f'{old_xmlpath}', 'w')
shutil.copy(old_xmlpath, new_xmlpath)
#测试集
for image in test_images:
old_path = file_path + '/' + image
new_path1 = new_file_path + '/' + 'test' + '/' + 'images'
if not os.path.exists(new_path1):
os.makedirs(new_path1)
new_path = new_path1 + '/' + image
shutil.copy(old_path, new_path)
new_name = os.listdir(new_file_path + '/' + 'test' + '/' + 'images')
for im in new_name:
old_xmlpath = xmlpath + '/' + im[:-3] + 'txt'
new_xmlpath1 = new_file_path + '/' + 'test' + '/' + 'labels'
if not os.path.exists(new_xmlpath1):
os.makedirs(new_xmlpath1)
new_xmlpath = new_xmlpath1 + '/' + im[:-3] + 'txt'
if not os.path.exists(f'{old_xmlpath}'):
open(f'{old_xmlpath}', 'w')
shutil.copy(old_xmlpath, new_xmlpath)
print('ok')
if __name__ == '__main__':
file_path = "D:\PyCharm Community Edition 2023.2.1\project\gu\yolov7-main\mydata/voc\JPGImages"
xmlpath = 'D:\PyCharm Community Edition 2023.2.1\project\gu\yolov7-main\mydata/voc/txt'
new_file_path = "D:\PyCharm Community Edition 2023.2.1\project\gu\yolov7-main\mydata/voc\labels"
split_data(file_path, new_file_path, train_rate=0.8, val_rate=0.1, test_rate=0.1)