分割分类数据集
# -*-coding: utf-8 -*-
import os
import random
import shutil
# 定义各个文件夹的路径
parent_folder = "./"
a_folder = os.path.join(parent_folder, "angry")
b_folder = os.path.join(parent_folder, "happy")
c_folder = os.path.join(parent_folder, "relaxed")
d_folder = os.path.join(parent_folder, "sad")
# 计算每个文件夹中要划分为验证集的图片数量
val_ratio = 0.1
a_val_size = int(val_ratio * len(os.listdir(a_folder)))
b_val_size = int(val_ratio * len(os.listdir(b_folder)))
c_val_size = int(val_ratio * len(os.listdir(c_folder)))
d_val_size = int(val_ratio * len(os.listdir(d_folder)))
# 分别对每个文件夹进行处理,将图片移动到 train/ 和 val/ 目录中
train_folder = os.path.join(parent_folder, "train")
val_folder = os.path.join(parent_folder, "val")
os.makedirs(train_folder, exist_ok=True)
os.makedirs(val_folder, exist_ok=True)
for folder in [a_folder, b_folder, c_folder, d_folder]:
filenames = os.listdir(folder)
random.shuffle(filenames) # 打乱文件顺序
val_filenames = set(filenames[:a_val_size])
train_filenames = set(filenames[a_val_size:])
# 移动文件到 train/ 或 val/ 目录中的子文件夹中
for filename in train_filenames:
src_path = os.path.join(folder, filename)
print(train_folder)
os.makedirs(os.path.join(train_folder, os.path.basename(folder)), exist_ok = True)
dst_path = os.path.join(train_folder, os.path.basename(folder), filename)
shutil.copy(src_path, dst_path)
for filename in val_filenames:
src_path = os.path.join(folder, filename)
os.makedirs(os.path.join(train_folder, os.path.basename(folder)), exist_ok = True)
dst_path = os.path.join(val_folder, os.path.basename(folder), filename)
shutil.copy(src_path, dst_path)