import numpy as np
import os
import shutil
##### Image
path = "D:/Data/"
file_path0 = path + "image" #更换数据集 image 的文件名
path_list0 = os.listdir(file_path0) #会历遍文件夹内的文件并返回一个列表
path_name0 = []
for i in path_list0:
path_name0.append(file_path0 + "/" + i)
# 排序一下 image
path_name0.sort()
train_path0 = []
trains_idx0 = []
val_path0 = []
vals_idx0 = []
##### label
file_path1 = path + "label" #更换你数据集 label 的文件名
path_list1 = os.listdir(file_path1) #会历遍文件夹内的文件并返回一个列表
path_name1 = []
for j in path_list1:
path_name1.append(file_path1 + "/" + j)
# 排序一下 label
path_name1.sort()
train_path1 = []
trains_idx1 = []
val_path1 = []
vals_idx1 = []
i = 0
for i in range(1): #图片的类别数为1
idx = np.arange(0, 9960)
np.random.shuffle(idx)
# image
train_idx0 = idx[0:7968] #训练集每一类选取随机排序后 80%张作为训练集
val_idx0 = idx[7968:9960] #验证集每一类选取随机排序后 20%作为验证集合
trains_idx0.extend(train_idx0)
vals_idx0.extend(val_idx0)
# label
train_idx1 = idx[0:7968] #训练集每一类选取随机排序后 80%作为训练集
val_idx1 = idx[7968:9960] #验证集每一类选取随机排序后 20%作为验证集合
trains_idx1.extend(train_idx1)
vals_idx1.extend(val_idx1)
# 如果没有存储分配数据后的文件夹,则创建文件夹
dirs = path + "training/train"
if not os.path.exists(dirs):
os.mkdir(path + "training/train")
os.mkdir(path + "training/val")
os.mkdir(path + "training/train_label")
os.mkdir(path + "training/val_label")
## 原图
path_name0 = np.array(path_name0)
train_path0 = path_name0[trains_idx0]
val_path0 = path_name0[vals_idx0]
for file_name0 in train_path0:
shutil.copy(file_name0, path + "training/train") #存放训练集文件的文件夹
for file_name0 in val_path0:
shutil.copy(file_name0, path + "training/val") #存放验证集文件的文件夹
## 标签
path_name1 = np.array(path_name1)
train_path1 = path_name1[trains_idx1]
val_path1 = path_name1[vals_idx1]
for file_name1 in train_path1:
shutil.copy(file_name1, path + "training/train_label") #存放训练集标签文件的文件夹
for file_name1 in val_path1:
shutil.copy(file_name1, path + "training/val_label") #存放验证集标签文件的文件夹
print("数据集分配完成!")
深度学习之数据集分配
最新推荐文章于 2024-06-13 09:30:00 发布