标注数据文件处理

import os
from pathlib import Path
import shutil
import random

"""
复制指定扩展名的文件从源目录到目标目录(包括子目录)。
:param src_dir: 源目录路径
:param txt_dir: 目标标签目录路径
:param img_dir: 目标图片目录路径
:param file_extension: 要复制的文件扩展名,默认为'.txt'
"""
def copy_specific_files(src_dir, txt_dir, img_dir, file_extension='.txt'):
    endswith = ['.xbm', '.tif', 'pjp', '.svgz', 'jpg', 'jpeg', 'ico', 'tiff', '.gif', 'svg', '.jfif', '.webp', '.png',
                '.bmp', 'pjpeg', '.avif']
    endswith_tuple = tuple(endswith)
    if not os.path.exists(txt_dir):
        os.makedirs(txt_dir)  # 如果目标目录不存在,则创建它
    if not os.path.exists(img_dir):
        os.makedirs(img_dir)  # 如果目标目录不存在,则创建它

    for root, dirs, files in os.walk(src_dir):
        for file in files:
            if file.endswith(file_extension):  # 检查文件扩展名
                if file == "classes.txt":
                    break
                src_file = os.path.join(root, file)  # 构建源文件完整路径

                dst_file = os.path.join(txt_dir, file)  # 构建目标文件完整路径
                shutil.copy2(src_file, dst_file)  # 复制文件
            if file.endswith(endswith_tuple):
                src_file = os.path.join(root, file)  # 构建源文件完整路径
                dst_file = os.path.join(img_dir, file)  # 构建目标文件完整路径
                shutil.copy2(src_file, dst_file)  # 复制文件


"""
复制指定扩展名的文件从源目录到目标目录(包括子目录)。
:param file_path: 源图片目录路径
:param file_path_label: 源标签路径
:param new_file_path: 目标图片路径
:param new_file_path_label: 目标标签路径
"""
def train_test_split(file_path, file_path_label, new_file_path, new_file_path_label):
    # 划分数据比例6:2:2
    split_rate = [0.6, 0.2, 0.2]
    class_names = os.listdir(file_path)  # 数据集文件和对应标签的名字是一样的
    class_names_label = os.listdir(file_path_label)
    # 目标文件夹下创建文件夹
    split_names = ['train', 'val', 'test']
    split_names_label = ['train', 'val', 'test']
    print(class_names)  # ['00000.jpg', '00001.jpg', '00002.jpg'... ]
    print(class_names_label)
    # 判断是否存在目标文件夹,不存在则创建---->创建train\val\test文件夹
    if os.path.isdir(new_file_path):
        pass
    else:
        os.makedirs(new_file_path)
    if os.path.isdir(new_file_path_label):
        pass
    else:
        os.makedirs(new_file_path_label)
    for split_name in split_names:
        split_path = new_file_path + "/" + split_name
        split_path_label = new_file_path_label + "/" + split_name
        print(split_path)  # D:/Code/Data/GREENTdata/train, val, test
        print(split_path_label)
        if os.path.isdir(split_path):
            pass
        else:
            os.makedirs(split_path)
        if os.path.isdir(split_path_label):
            pass
        else:
            os.makedirs(split_path_label)

    # 按照比例划分数据集,并进行数据图片的复制
    for class_name in class_names:
        current_data_path = file_path  # D:/Code/Data/centerlinedata/tem_voc/JPEGImages/
        current_data_path_label = file_path_label
        current_all_data = os.listdir(current_data_path)
        current_all_data_label = os.listdir(current_data_path_label)
        # os.listdir() 方法用于返回指定的文件夹包含的文件或文件夹的名字的列表。这个列表以字母顺序排列。
        current_data_length = len(current_all_data)  # 文件夹下的图片个数
        current_data_length_label = len(current_all_data_label)
        current_data_index_list = list(range(current_data_length))
        current_data_index_list_label = list(range(current_data_length_label))
        # 函数list()将range()的结果直接转换为列表,是一个数字列表。
        random.shuffle(current_data_index_list)  # 将列表顺序打乱
        random.shuffle(current_data_index_list_label)

        train_path = os.path.join(new_file_path, 'train/')  # D:/Code/Data/GREENTdata/train/
        train_path_label = os.path.join(new_file_path_label, 'train/')
        val_path = os.path.join(new_file_path, 'val/')  # D:/Code/Data/GREENTdata/val/
        val_path_label = os.path.join(new_file_path_label, 'val/')
        test_path = os.path.join(new_file_path, 'test/')  # D:/Code/Data/GREENTdata/test/
        test_path_label = os.path.join(new_file_path_label, 'test/')

        train_stop_flag = current_data_length * split_rate[0]
        train_stop_flag_label = current_data_length_label * split_rate[0]
        val_stop_flag = current_data_length * (split_rate[0] + split_rate[1])
        val_stop_flag_label = current_data_length_label * (split_rate[0] + split_rate[1])

    current_idx = 0
    train_num = 0
    val_num = 0
    test_num = 0
    # 图片复制到文件夹中
    for i in current_data_index_list:
        src_img_path = os.path.join(current_data_path, current_all_data[i])
        src_img_path_label = os.path.join(current_data_path_label, current_all_data_label[i])
        if current_idx < train_stop_flag:
            shutil.copy2(src_img_path, train_path)
            shutil.copy2(src_img_path_label, train_path_label)
            train_num += 1
        elif (current_idx > train_stop_flag) and (current_idx <= val_stop_flag):
            shutil.copy2(src_img_path, val_path)
            shutil.copy2(src_img_path_label, val_path_label)
            val_num += 1
        else:
            shutil.copy2(src_img_path, test_path)
            shutil.copy2(src_img_path_label, test_path_label)
            test_num += 1
        current_idx += 1
    print("Done!", train_num, val_num, test_num)


if __name__ == '__main__':
    rpath = Path("D:/work/yolov5/datasets/2024/")
    for root, dirs, files in os.walk(rpath):
        for file in files:
            new_file=file.replace("龙华五岔路口五个方向","1")
            src_file = os.path.join(root, file)

            print(src_file+'---'+new_file)
            shutil.move(src_file,os.path.join(root, new_file))

# if __name__ == '__main__':
#     # 源文件夹路径
#     file_path = r"D:\work\yolov5\my_data\images"
#     file_path_label = r"D:\work\yolov5\my_data\labels"
#     # 新文件路径
#     new_file_path = r"D:\work\yolov5\my_data\2024\img"
#     new_file_path_label = r"D:\work\yolov5\my_data\2024\label"
#     train_test_split(file_path, file_path_label, new_file_path, new_file_path_label)

将团队标注好的数据,按比例复制到对应目录

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值