划分数据的方法
1、分割数据集(图片)进行存放
首先整体目录是这样的
以下是运行代码的结果
分割后的图片名保持一致
下面是代码(可能比较繁琐,并没有进行优化)
import os
import random
from PIL import Image
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
def division_train_eval(data_path):
list = os.listdir(data_path)
for i in range(0, len(list)):
train_path_temporary = './data/train/'+ str(list[i])
if not os.path.exists(train_path_temporary):
os.makedirs(train_path_temporary)
eval_path_temporary = './data/eval/'+ str(list[i])
if not os.path.exists(eval_path_temporary):
os.makedirs(eval_path_temporary)
data_list = []
path = os.path.join(data_path, list[i])
if os.path.isdir(path): # 判断是否是文件夹
for j in os.listdir(path):
path_class = path + "\\" + str(j)
data_list.append(path_class)
random.shuffle(data_list)
print("==================================")
print(len(data_list))
train_list = []
eval_list = []
for i in range(int(len(data_list) * 0.8)):
train_list.append(data_list[i])
for i in range(int(len(data_list) * 0.8), len(data_list)):
eval_list.append(data_list[i])
print(len(train_list))
print(len(eval_list))
for i in train_list:
I = Image.open(i)
path_name = i.split('\\')[-1]
path_save = train_path_temporary + '\\' +path_name
I.save(path_save)
for i in eval_list:
I = Image.open(i)
path_name = i.split('\\')[-1]
path_save = eval_path_temporary + '\\' +path_name
I.save(path_save)
if __name__ == '__main__':
data_path = './data/dataset'
division_train_eval(data_path)