数据集切分:一般七三分,70%的用来训练,剩下的30%用来测试
这个脚本是把CASIA-WebFace进行训练集和测试集的切分,写入txt中
import os
path='/home/xl/workstation/Datasets/CASIA-WebFace'
datanames = os.listdir(path)
for l in datanames:
dir='/home/xl/workstation/Datasets/CASIA-WebFace/'+l+'/'
label =l
files =os.listdir(dir)
files.sort()
files_length=len(files)
train=open('/home/xl/workstation/Datasets/CASIA-WebFace/train.txt', 'a')
val = open('/home/xl/workstation/Datasets/CASIA-WebFace/val.txt', 'a')
i=1
for file in files:
if i<files_length*0.7:
fileType = os.path.split(file)
if fileType[1]=='.txt':
continue
name = str(dir)+file+' '+str(int(label))+'\n'
train.write(name)
i=i+1
print(i)
else:
fileType =os.path.split(file)
if fileType[1]=='.txt':
continue
name = str(dir)+file+' '+str(int(label))+'\n'
val.write(name)
i=i+1
print(i)
val.close()
train.close()
print(l)