yolov5笔记之一键分割训练集和验证集

最新推荐文章于 2024-08-09 22:06:16 发布

光明有我16620122910

最新推荐文章于 2024-08-09 22:06:16 发布

阅读量608

点赞数 1

分类专栏： python

本文链接：https://blog.csdn.net/u011619323/article/details/128502988

版权

深度学习人工智能 python

python 专栏收录该内容

54 篇文章 0 订阅

订阅专栏

import glob
import math
import os
import shutil
# 根据label_from找对应的image,label,
# images  labels
import time

IMAGE_FORMAT = [".jpg", ".png"][0]
#一般分配比例为训练集和测试集的比例为7:3或是8:2
split_rate = 0.8  # 训练集和验证集的分割比例
# 从哪里来   dir
label_from = r"D:\Dataset\Annotation_temp"  # labelimg保存label的路径  文件夹
image_from = r"D:\Dataset\Images_temp"  # labelimg保存image的路径  文件夹
# 到哪里去   dir
target_dir = r"D:\Dataset\QQ_SafeVerify"  # label要移动到的目标文件夹  文件夹
target_dir = target_dir + "_" + str(round(time.time()))


def mk_directory(target_dir):
    '''
    创建目录结构
    '''
    print("创建目录结构")
    target_dir_info = {}
    paths = []  # 待创建的文件夹
    paths.append(target_dir)
    temp = os.path.join(target_dir, "train")
    paths.append(temp)
    target_dir_info["train_images"] = os.path.join(temp, "images")
    target_dir_info["train_labels"] = os.path.join(temp, "labels")
    paths.append(os.path.join(temp, "images"))
    paths.append(os.path.join(temp, "labels"))
    temp = os.path.join(target_dir, "val")
    paths.append(temp)
    target_dir_info["val_images"] = os.path.join(temp, "images")
    target_dir_info["val_labels"] = os.path.join(temp, "labels")
    paths.append(os.path.join(temp, "images"))
    paths.append(os.path.join(temp, "labels"))
    paths.append(os.path.join(target_dir, "test"))
    for dir in paths:
        if not os.path.exists(dir):
            os.mkdir(dir)
    return target_dir_info


def split_samples(label_from, image_from, target_dir_info):
    '''
    从临时images和labels中按照分割比例,把文件移动到自定义结构的目标文件中
        train
            images  文件夹
            labels  文件夹
                classes.txt
                ...
        val
            images  文件夹
            labels  文件夹
                classes.txt
                ...
    '''
    print("分割数据集train_val")
    ls0 = glob.glob(os.path.join(label_from, "*.txt"))
    # 先判断Annotation文件的完整性
    for p in ls0:
        if p.endswith("classes.txt"):
            continue
        t = os.path.split(p)
        print(t[1])
        temp_file = t[1][:-4] + IMAGE_FORMAT
        temp_file = os.path.join(image_from, temp_file)
        if not os.path.exists(temp_file):
            os.remove(p)
            print("文件不存在:", temp_file)
    ls = []  # 文件名.txt
    for p in ls0:
        if p.endswith("classes.txt"):
            if os.path.exists(p):
                shutil.copy(p, target_dir_info["train_labels"])
                shutil.move(p, target_dir_info["val_labels"])
                continue
        ls.append(p)
    total = len(ls)
    train_num = math.ceil(total * split_rate)
    valid_num = total - train_num
    print("总体样本数量total:", total)
    print("训练集样本数量train_num:", train_num)
    print("验证集样本数量valid_num:", valid_num)
    for i, v in enumerate(ls):
        to_image_dir = target_dir_info["train_images"] if i < train_num else target_dir_info["val_images"]
        to_label_dir = target_dir_info["train_labels"] if i < train_num else target_dir_info["val_labels"]
        print("to_image_dir", to_image_dir)
        print("to_label_dir", to_label_dir)
        t = os.path.split(v)
        print(t[1])
        # 先移动labels 文件txt
        from_file = os.path.join(label_from, t[1])  # t[1]:16612341421.txt
        to_file = os.path.join(to_label_dir, t[1])
        shutil.move(from_file, to_file)
        # 在移动images jpg文件
        from_file = os.path.join(image_from, t[1])  # t[1]:16612341421.txt
        from_file = from_file.replace(".txt", IMAGE_FORMAT)
        to_file = os.path.join(to_image_dir, t[1])
        to_file = to_file.replace(".txt", IMAGE_FORMAT)
        # shutil.copy(from_file, to_file)
        shutil.move(from_file, to_file)


def mk_Annotation():
    '''
    模拟生成Annotation.txt文件
    '''
    print("模拟生成Annotation.txt文件")
    ls = glob.glob(os.path.join(label_from, "*.txt"))
    if len(ls) >= 1:
        return
    with open(os.path.join(label_from, "classes.txt"), "w") as f:
        f.write("1")
    ls = glob.glob(os.path.join(image_from, "*" + IMAGE_FORMAT))
    ls = ls[:10]
    for p in ls:
        print(p)  # C:\Users\999\Desktop\QQ_SafeVerify\sample\1672192768483.jpg
        t = os.path.split(p)
        print(t[1][:-4])
        file = os.path.join(label_from, t[1][:-4] + ".txt")
        with open(file, "w") as f:
            ...


if __name__ == '__main__':
    #############################################创建目录结构
    target_dir_info = mk_directory(target_dir)
    print(target_dir_info)
    # target_dir_info = {'train_images': 'C:\\Users\\999\\Desktop\\QQ_SafeVerify\\dataset\\train\\images',
    #                    'train_labels': 'C:\\Users\\999\\Desktop\\QQ_SafeVerify\\dataset\\train\\labels',
    #                    'val_images': 'C:\\Users\\999\\Desktop\\QQ_SafeVerify\\dataset\\val\\images',
    #                    'val_labels': 'C:\\Users\\999\\Desktop\\QQ_SafeVerify\\dataset\\val\\labels'}
    #############################################模拟环境
    mk_Annotation()  # 在label_from 中生成10个与images对应的txt
    #############################################模拟环境
    split_samples(label_from, image_from, target_dir_info)
    print("分割完成:", target_dir)