python实现VOC格式(xml)标签批量转换为yolo格式(txt)标签

最新推荐文章于 2025-05-13 15:28:26 发布

onnx

最新推荐文章于 2025-05-13 15:28:26 发布

阅读量3.1k

点赞数 5

分类专栏：目标识别检测 python语言数据处理文章标签： python xml 深度学习人工智能

本文链接：https://blog.csdn.net/DeepLearning_/article/details/127625153

版权

目标识别检测同时被 3 个专栏收录

33 篇文章

订阅专栏

python语言

16 篇文章

订阅专栏

数据处理

3 篇文章

订阅专栏

在这里插入图片描述

文章目录

前言
一、VOC格式标签
二、YOLO格式标签
三、voc_to_yolo.py转换脚本
四、完整代码如下
五、总结

前言

我们常见的目标检测算法使用的数据集对应标签格式多为四种，分别是voc格式、yolo格式、json格式、coco格式。本文将分享自己使用的voc转yolo格式的脚本文件，批量转换，非常高效，后续将继续分享yolo格式转voc格式脚本、voc转json格式脚本等，如果本文对你有帮助，谢谢阅览点赞收藏！

一、VOC格式标签

在转换voc格式标签之前，我们先了解下voc格式标签是什么样的，有哪些特点。典的开源数据集VOC2007、VOC2012、VOC2017这些数据集的标签格式为voc格式，即标签文件为xml文件，xml文件包含的主要信息如下图所示：
在这里插入图片描述

二、YOLO格式标签

上面我们了解了VOC格式标签的基本样子和主要包含的信息，下面介绍一下yolo格式标签，重点讲下txt文件中每行数字代表什么意思。如下图所示：
在这里插入图片描述

三、voc_to_yolo.py转换脚本

1.引入库

代码如下（示例）：

import os
import cv2
import pickle
import xml.etree.ElementTree as ET
from os.path import join
from skimage import io
from os import listdir, getcwd
from PIL import Image

2.定义转换逻辑

代码如下（示例）：

def convert(size, box):
    x_center = (box[0]+box[1])/2.0
    y_center = (box[2]+box[3])/2.0
    x = x_center / size[0]
    y = y_center / size[1]

    w = (box[1] - box[0]) / size[0]
    h = (box[3] - box[2]) / size[1]
    """
    if x >= 1:
        x = 0.999
    if y >= 1:
        y = 0.999
    if w >= 1:
        w = 0.999
        
    if h >= 1:
        h = 0.999
    """
    # print(x, y, w, h)
    return (x,y,w,h)

3.实现转换函数

def convert_annotation(xml_files_path, save_txt_files_path, classes):  
    line_data = []
    with open(xml_files_path,'r') as f:
        for line in f.readlines():
            line = line.strip()
            x = line.split(";",6)
            print(x)
            print(x[0])
            out_txt_path = os.path.join(save_txt_files_path, x[0].split('.')[0] + '.txt')
            if x[5] == 'warning':
                class_id = 0
            if x[5] == 'prohibitory':
                class_id = 1
            if x[5] == 'mandatory':
                class_id = 2
            b = (float(x[1]),float(x[3]),float(x[2]),float(x[4]))
            
            img = Image.open('F:/CCTSDB_one/GroundTruth/'+x[0])
            w, h = list(img.size)
            
            print(w,h,b)
            bb = convert((w,h), b)
            with open(out_txt_path, 'w') as f:
                f.write(str(class_id) + " " + " ".join([str(a) for a in bb]) + '\n')

4.实现入口主函数

if __name__ == "__main__":
#定义类别，先从voc格式标签中提取，搞清楚，一定要对应起来，不能多，也不能少，否则最后转换的txt文件容易出错。
    classes1 = ['warning','prohibitory','mandatory']
    # voc格式的xml标签文件路径
    GT_files1 = r'F:\CCTSDB_one\GroundTruth\GroundT'
    #转化为yolo格式的txt标签文件存储路径
    save_txt_files1 = r'F:\CCTSDB_one\GroundTruth\yolo_txt'

    convert_annotation(GT_files1, save_txt_files1, classes1)

四、完整代码如下

"""
voc格式xml转化为yolo中的txt
"""

import xml.etree.ElementTree as ET
import pickle
import os
from os import listdir, getcwd
from os.path import join
from PIL import Image
import cv2
from skimage import io

def convert(size, box):
    # size=(width, height)  b=(xmin, xmax, ymin, ymax)
    # x_center = (xmax+xmin)/2        y_center = (ymax+ymin)/2
    # x = x_center / width            y = y_center / height
    # w = (xmax-xmin) / width         h = (ymax-ymin) / height

    x_center = (box[0]+box[1])/2.0
    y_center = (box[2]+box[3])/2.0
    x = x_center / size[0]
    y = y_center / size[1]

    w = (box[1] - box[0]) / size[0]
    h = (box[3] - box[2]) / size[1]
    """
    if x >= 1:
        x = 0.999
    if y >= 1:
        y = 0.999
    if w >= 1:
        w = 0.999
        
    if h >= 1:
        h = 0.999
    """
    # print(x, y, w, h)
    return (x,y,w,h)



def convert_annotation(xml_files_path, save_txt_files_path, classes):  
    #xml_files = open(xml_files_path,'r')
    #gt_files = open(xml_files_path,'r')
    #print(gt_files)
    line_data = []
    #root = r'F:\CCTSDB_one\GroundTruth\'
    with open(xml_files_path,'r') as f:
        for line in f.readlines():
            line = line.strip()
            x = line.split(";",6)
            print(x)
            print(x[0])
            out_txt_path = os.path.join(save_txt_files_path, x[0].split('.')[0] + '.txt')
            #out_txt_f = open(out_txt_path, 'w')
            if x[5] == 'warning':
                class_id = 0
            if x[5] == 'prohibitory':
                class_id = 1
            if x[5] == 'mandatory':
                class_id = 2
            b = (float(x[1]),float(x[3]),float(x[2]),float(x[4]))
            
            img = Image.open('F:/CCTSDB_one/GroundTruth/'+x[0])
            w, h = list(img.size)
            
            print(w,h,b)
            bb = convert((w,h), b)
            with open(out_txt_path, 'w') as f:
                f.write(str(class_id) + " " + " ".join([str(a) for a in bb]) + '\n')
            
            
    
"""       
    for xml_name in xml_files:
        print(xml_name)
        xml_file = os.path.join(xml_files_path, xml_name)
        out_txt_path = os.path.join(save_txt_files_path, xml_name.split('.')[0] + '.txt')
        out_txt_f = open(out_txt_path, 'w')
        tree=ET.parse(xml_file)
        root = tree.getroot()
        size = root.find('size')
        w = int(size.find('width').text)
        h = int(size.find('height').text)

        for obj in root.iter('object'):
            difficult = obj.find('difficult').text
            cls = obj.find('name').text
            if cls not in classes or int(difficult) == 1:
                continue
            cls_id = classes.index(cls)
            xmlbox = obj.find('bndbox')
            b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
            # b=(xmin, xmax, ymin, ymax)
            print(w, h, b)
            bb = convert((w,h), b)
            out_txt_f.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
"""

if __name__ == "__main__":    #================================================================================================
    classes1 = ['warning','prohibitory','mandatory']
    # 2、voc格式的xml标签文件路径
    GT_files1 = r'F:\CCTSDB_one\GroundTruth\GroundT.txt'
    # 3、转化为yolo格式的txt标签文件存储路径
    save_txt_files1 = r'F:\CCTSDB_one\GroundTruth\yolo_txt'

    convert_annotation(GT_files1, save_txt_files1, classes1)