目标检测voc转yolo格式

最新推荐文章于 2024-08-05 11:19:50 发布

何顾长安

最新推荐文章于 2024-08-05 11:19:50 发布

阅读量725

点赞数

分类专栏：学习记录文章标签： pytorch python 深度学习

本文链接：https://blog.csdn.net/heguu/article/details/119347931

版权

学习记录专栏收录该内容

15 篇文章 0 订阅

订阅专栏

yolo数据格式

每一行为一个物体，每个物体以（类别 x y w h），类别从0开始，坐标所有值都为相对值，取值范围为0~1。

类别物体中心x坐标，物体中心y坐标，宽度w，高度h
0 0.546242 0.231124 0.526212 0.734124

在这里插入图片描述

把voc 目标框的坐标信息（xmin，xmax，ymin，ymax）提取出来转换成yolo格式（x,y,width,height），并且重新保存。

# todo
#  把voc数据集标注（xml）信息转换成yolo标注格式(txt）,并将对应图像文件复制到对应文件夹
#  根据voc的json文件，生成对应的names标签
import os
from lxml import etree
import json
import shutil
from tqdm import tqdm

voc_root = r'D:\pycharm_project\practice\VOCdevkit'
voc_version = 'VOC2012'

# voc  imagesets 下面main中保存的训练集验证集信息
train_txt = 'train.txt'
val_txt = "val.txt"

# jason文件位置
json_path = r'D:\pycharm_project\practice\VOCdevkit\VOC2012\pascal_voc_classes.json'

# 转换后文件保存目录
save_file_root = r'D:\pycharm_project\practice\yolov3-spp\my_yolo_dataset'

# 整理路径
voc_images_path = os.path.join(voc_root,voc_version,"JPEGImages") #图片路径
voc_xml_path = os.path.join(voc_root,voc_version,"Annotations") #标注信息路径
train_txt_path = os.path.join(voc_root,voc_version,'ImageSets','Main',train_txt) #训练集图片名称路径
val_txt_path = os.path.join(voc_root,voc_version,'ImageSets','Main',val_txt) #测试集图片路径

if os.path.exists(save_file_root) is False:
    os.makedirs(save_file_root)

# 验证文件是否存在
assert os.path.exists(voc_images_path),"voc_images_path not exist"
assert os.path.exists(voc_xml_path),"voc_xml_path not exist"
assert os.path.exists(train_txt_path),"train_txt_path not exist"
assert os.path.exists(val_txt_path),"val_txt_path not exist"



def xml_to_dict(xml):
    """
    把xml递归成嵌套字典 object按列表存放
    :param xml: xml文件
    :return: dict:包含xml各种信息
    """
    if len(xml) == 0:
        # 遍历到底层 返回信息
        return xml.text

    result={}
    for child in xml:
        child_child = xml_to_dict(child)# 递归获取信息
        if child.tag != 'object': # 本层非object 直接按字典存
            result[child.tag] = child_child #本层tag的内容是下层
        else: # 本层是object 用列表存 不然多个object 会只保存最后一个object
            if child.tag not in result:
                result[child.tag] = []
            result[child.tag].append(child_child)
    return result


def trans_info(file_names, classes_dict,save_path,train_or_val="train"):
    """
    把voc格式 转成 yolo 注意yolo类别从0开始
    :param file_names: list: 文件名（不包括后缀）
    :param classes_dict: 类别信息
    :param save_path: 要保存的路径
    :param train_or_val: train or val
    :return: None
    """
    for file_name in tqdm(file_names,desc="转换{}文件中".format(train_or_val)):
        if file_name.strip() == '':
            continue
        xml_path = os.path.join(voc_xml_path,file_name+'.xml')
        #读取对应file_name
        with open(xml_path) as f:
            xml_str = f.read()
        # read读出来str 再转回xml文件
        xml = etree.fromstring(xml_str)
        # 传入xml文件，返回转换后的字典
        annotations_info = xml_to_dict(xml)

        # 读取信息
        img_height = float(annotations_info['size']['height'])
        img_width = float(annotations_info['size']['width'])
        if os.path.exists(os.path.join(save_path,train_or_val)) is False:
            os.makedirs(os.path.join(save_path,train_or_val))
        with open(os.path.join(save_path,train_or_val,file_name+'.txt'),'w') as f:
            for i,obj in enumerate(annotations_info['object']):
                # 获取坐标信息
                xmin = float(obj['bndbox']['xmin'])
                xmax = float(obj['bndbox']['xmax'])
                ymin = float(obj['bndbox']['ymin'])
                ymax = float(obj['bndbox']['ymax'])
                # 等会要相对化所以先转成浮点
                name = obj['name']
                index = classes_dict[name] -1 # yolo的编号是从0开始 voc是从1开始

                #转成yolo （label xcenter ycenter width height）
                x_center = xmin+(xmax-xmin)/2
                y_center = ymin+(ymax-ymin)/2
                w = xmax-xmin
                h = ymax-ymin
                # 保留6位小数
                x_center = round(x_center/img_width,6)
                y_center = round(y_center/img_height,6)
                w = round(w/img_width,6)
                h = round(h/img_height,6)

                #转换成字符串
                info = [str(j) for j in [index,x_center,y_center,w,h]]

                # 保存
                if i == 0:
                    # 用空格隔开
                    f.write(' '.join(info))
                else:
                    f.write('\n'+' '.join(info))

        # 把图片复制到对应文件夹
        if os.path.exists(os.path.join(save_path,'Image')) is False:
            os.makedirs(os.path.join(save_path,'Image'))
        shutil.copyfile(os.path.join(voc_images_path,file_name+'.jpg'),os.path.join(save_path,'Image',file_name+'.jpg'))


def create_class_names(class_dict: dict):
    keys = class_dict.keys()
    with open(os.path.join(save_file_root,'my_data_label.names'), "w") as w:
        for index, k in enumerate(keys):
            if index + 1 == len(keys):
                w.write(k)
            else:
                w.write(k + "\n")

def main():
    # todo 读取train.txt ，val.txt 把里面的文件转移，并且信息修改成yolo的格式
    # 打开json文件并且加载成字典
    # 把train和val文件复制过来
    shutil.copyfile(train_txt_path, os.path.join(save_file_root,'train.txt'))
    shutil.copyfile(val_txt_path, os.path.join(save_file_root,'val.txt'))

    f1 = open(json_path, 'r')
    class_dict = json.load(f1)
    #读取 train.txt

    with open(train_txt_path) as f:
        train_file_names = [file_name for file_name in f.read().split('\n')]
        print(train_file_names)


    trans_info(train_file_names,class_dict,save_file_root,"train")
    with open(val_txt_path) as f:
        train_file_names = [file_name for file_name in f.read().split('\n')]
        print(train_file_names)
    trans_info(train_file_names, class_dict, save_file_root, "val")

    # todo 读取json文件 生成data.names
    create_class_names(class_dict)


if __name__ == '__main__':
    main()