YOLO格式数据清洗不需要类别

最新推荐文章于 2024-08-09 00:38:37 发布

heromps

最新推荐文章于 2024-08-09 00:38:37 发布

阅读量120

点赞数 2

分类专栏： YOLO 文章标签： YOLO python 机器学习

本文链接：https://blog.csdn.net/heromps/article/details/140382748

版权

YOLO 专栏收录该内容

9 篇文章 0 订阅

订阅专栏

在YOLO格式的数据集中过滤特定类别，并重新映射保留类别的ID。然后，将符合条件的图像和标签文件复制到新的目录结构中。

import os
import shutil

# 定义需要保留的类别，并为新类别创建ID映射
original_classes = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37,
 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55,
 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73,
 74, 75, 76, 77, 78, 79]  # 原始类别ID
retained_classes = [0, 2, 4, 6, 8]  # 需要保留的类别ID

# 创建新类别ID的映射
id_mapping = {original_id: new_id for new_id, original_id in enumerate(retained_classes)}

# 设置数据集路径和目标目录路径
data_splits = ['train', 'val', 'test']
base_dir = 'ultralytics/datasets/coco128'
output_base_dir = 'ultralytics/datasets/filtered_dataset'

base_dir = os.path.abspath(base_dir)
output_base_dir = os.path.abspath(output_base_dir)
os.makedirs(output_base_dir, exist_ok=True)

for split in data_splits:
    image_dir = os.path.join(base_dir, 'images', split)
    label_dir = os.path.join(base_dir, 'labels', split)

    if not os.path.exists(image_dir) or not os.path.exists(label_dir):
        print(f"Skipping {split} as it does not exist.")
        continue

    output_image_dir = os.path.join(output_base_dir, 'images', split)
    output_label_dir = os.path.join(output_base_dir, 'labels', split)

    os.makedirs(output_image_dir, exist_ok=True)
    os.makedirs(output_label_dir, exist_ok=True)

    # 获取所有标签文件
    label_files = [f for f in os.listdir(label_dir) if f.endswith('.txt')]

    for label_file in label_files:
        label_path = os.path.join(label_dir, label_file)
        image_path = os.path.join(image_dir, label_file.replace('.txt', '.jpg'))

        with open(label_path, 'r') as f:
            lines = f.readlines()

        filtered_lines = []
        for line in lines:
            parts = line.strip().split()
            class_id = int(parts[0])
            if class_id in id_mapping:
                new_class_id = id_mapping[class_id]
                parts[0] = str(new_class_id)
                filtered_lines.append(" ".join(parts) + "\n")

        if filtered_lines:
            # 如果有保留的标签，写回新的文件
            new_label_path = os.path.join(output_label_dir, label_file)
            new_image_path = os.path.join(output_image_dir, label_file.replace('.txt', '.jpg'))

            with open(new_label_path, 'w') as f:
                f.writelines(filtered_lines)

            # 复制图像文件到新的目录
            shutil.copy(image_path, new_image_path)
        else:
            # 如果没有保留的标签，跳过此文件
            continue

print("Dataset filtering and ID remapping completed.")