Yolo数据集标注转VOC脚本（包含边界框验证和错误处理功能，可避免生成无效的 XML 文件）-CSDN博客

本文链接：https://blog.csdn.net/weixin_69011755/article/details/147993653

一个增强版的 YOLO 转 VOC 脚本，包含边界框验证和错误处理功能，可避免生成无效的 XML 文件：

import os
import cv2
import xml.etree.ElementTree as ET
from tqdm import tqdm


def yolo_to_xml(yolo_dir, image_dir, output_dir, classes, min_box_size=1.0):
    """
    将 YOLO 格式标注转换为 Pascal VOC XML 格式，包含边界框验证和错误处理

    参数:
    yolo_dir: YOLO 标注文件所在目录
    image_dir: 对应图像所在目录
    output_dir: 输出 XML 文件的目录
    classes: 类别名称列表，按 YOLO 标注中的 ID 顺序排列
    min_box_size: 最小边界框尺寸（像素），小于此值的边界框将被过滤
    """
    # 创建输出目录
    os.makedirs(output_dir, exist_ok=True)

    # 统计信息
    total_files = 0
    invalid_files = 0
    total_boxes = 0
    filtered_boxes = 0

    # 获取所有 YOLO 标注文件
    yolo_files = [f for f in os.listdir(yolo_dir) if f.endswith('.txt')]

    for yolo_file in tqdm(yolo_files, desc="转换进度"):
        total_files += 1
        # 获取图像文件名（不含扩展名）
        base_name = os.path.splitext(yolo_file)[0]

        # 检查对应图像是否存在
        img_path = os.path.join(image_dir, f"{base_name}.jpg")
        if not os.path.exists(img_path):
            img_path = os.path.join(image_dir, f"{base_name}.png")
            if not os.path.exists(img_path):
                print(f"警告: 未找到图像文件 {base_name}")
                invalid_files += 1
                continue

        # 读取图像尺寸
        img = cv2.imread(img_path)
        if img is None:
            print(f"错误: 无法读取图像 {img_path}")
            invalid_files += 1
            continue

        height, width, depth = img.shape

        # 创建 XML 根节点
        annotation = ET.Element('annotation')
        ET.SubElement(annotation, 'folder').text = os.path.basename(image_dir)
        ET.SubElement(annotation, 'filename').text = os.path.basename(img_path)
        ET.SubElement(annotation, 'path').text = img_path

        # 图像尺寸信息
        size = ET.SubElement(annotation, 'size')
        ET.SubElement(size, 'width').text = str(width)
        ET.SubElement(size, 'height').text = str(height)
        ET.SubElement(size, 'depth').text = str(depth)

        ET.SubElement(annotation, 'segmented').text = '0'

        # 解析 YOLO 标注文件
        with open(os.path.join(yolo_dir, yolo_file), 'r') as f:
            lines = f.readlines()

        valid_objects = 0
        for line in lines:
            line = line.strip().split()
            if not line:
                continue

            # 解析 YOLO 格式数据
            try:
                class_id = int(line[0])
                x_center = float(line[1])
                y_center = float(line[2])
                w = float(line[3])
                h = float(line[4])
            except (ValueError, IndexError) as e:
                print(f"错误: 在文件 {yolo_file} 中解析行失败: {line}")
                continue

            total_boxes += 1

            # 转换为 VOC 格式（像素坐标）
            xmin = int((x_center - w / 2) * width)
            ymin = int((y_center - h / 2) * height)
            xmax = int((x_center + w / 2) * width)
            ymax = int((y_center + h / 2) * height)

            # 确保坐标在有效范围内
            xmin = max(0, xmin)
            ymin = max(0, ymin)
            xmax = min(width, xmax)
            ymax = min(height, ymax)

            # 验证边界框有效性
            box_width = xmax - xmin
            box_height = ymax - ymin

            if box_width < min_box_size or box_height < min_box_size:
                filtered_boxes += 1
                print(f"过滤小边界框: {base_name}.txt, 类别: {classes[class_id]}, 宽: {box_width}, 高: {box_height}")
                continue

            if xmin >= xmax or ymin >= ymax:
                filtered_boxes += 1
                print(f"过滤无效边界框: {base_name}.txt, xmin={xmin}, ymin={ymin}, xmax={xmax}, ymax={ymax}")
                continue

            # 添加目标对象到 XML
            obj = ET.SubElement(annotation, 'object')
            ET.SubElement(obj, 'name').text = classes[class_id]
            ET.SubElement(obj, 'pose').text = 'Unspecified'
            ET.SubElement(obj, 'truncated').text = '0'
            ET.SubElement(obj, 'difficult').text = '0'

            # 添加边界框
            bbox = ET.SubElement(obj, 'bndbox')
            ET.SubElement(bbox, 'xmin').text = str(xmin)
            ET.SubElement(bbox, 'ymin').text = str(ymin)
            ET.SubElement(bbox, 'xmax').text = str(xmax)
            ET.SubElement(bbox, 'ymax').text = str(ymax)

            valid_objects += 1

        # 如果没有有效对象，跳过此文件
        if valid_objects == 0:
            print(f"警告: {yolo_file} 不包含有效边界框")
            invalid_files += 1
            continue

        # 保存 XML 文件
        xml_path = os.path.join(output_dir, f"{base_name}.xml")
        tree = ET.ElementTree(annotation)
        tree.write(xml_path, encoding='utf-8', xml_declaration=True)

    # 打印统计信息
    print(f"转换完成！共处理 {total_files} 个文件，其中 {invalid_files} 个文件无效")
    print(f"总共 {total_boxes} 个边界框，过滤掉 {filtered_boxes} 个无效/小边界框")
    print(f"有效 XML 文件已保存至: {output_dir}")


# 使用示例
if __name__ == "__main__":
    # 配置参数
    yolo_dir = r'C:\Users\29420\Desktop\UP\dataset\val\labels'
    image_dir = r'C:\Users\29420\Desktop\UP\dataset\val\images'
    output_dir = r'C:\Users\29420\Desktop\voc\xml'  # 输出 XML 目录
    classes = ['watermelon',
               'apple',
               'banana',
               'grape',
               'orange',
               'pear',
               'pomegranate',
               'nectarine',
               'mango',
               'lychee',
               'longan',
               'durian',
               'cantaloupe',
               'blueberry']  # 类别列表，按 YOLO 标注中的 ID 顺序
    min_box_size = 1.0  # 最小边界框尺寸（像素）

    # 执行转换
    yolo_to_xml(yolo_dir, image_dir, output_dir, classes, min_box_size)

边界框验证：
- 添加最小尺寸过滤（默认 1x1 像素）
- 检查 xmin < xmax 和 ymin < ymax
- 处理坐标超出图像范围的情况
错误处理：
- 检查图像文件是否存在
- 处理无效的 YOLO 标注行
- 跳过不包含有效边界框的文件
统计信息：
- 输出处理进度和统计数据
- 记录过滤的无效边界框数量