先设置比例如8:1:1,具体如代码所示。然后输入文件夹dataSet的路径,dataSet里又包含2个文件夹images和labels。images里为所有图片,labels为所有的txt文本文件。
运行结果如图所示:
会生成包含图片相对路径的文本文件,trainSet.txt如下所示,其他2个文本文件类似。
trainImages文件夹是训练集对应的图片(jpg文件),trainLabels文件夹是训练集对应的标签(txt文件),其他类似。YOLOv5、YOLOv6、YOLOv7、YOLOv8和YOLOv9都可通过此代码生成所需的数据集格式。
import os
import random
import shutil
def main():
for dir in ['trainImages', 'trainLabels', 'valImages', 'valLabels', 'testImages', 'testLabels']:
os.makedirs(os.path.join(rootPath, dir))
trainSet = open(rootPath + '/trainSet.txt', 'w')
valSet = open(rootPath + '/valSet.txt', 'w')
testSet = open(rootPath + '/testSet.txt', 'w')
imagesPath = os.path.join(rootPath, 'images')
labelsPath = os.path.join(rootPath, 'labels')
list = os.listdir(imagesPath)
numberOfImages = len(list)
numberOfTrainAndVal = int(numberOfImages * percentageOfTrainAndVal)
numberOfTrain = int(numberOfTrainAndVal * percentageOfTrain)
trainAndVal = random.sample(range(numberOfImages), numberOfTrainAndVal)
train = random.sample(trainAndVal, numberOfTrain)
for i in range(numberOfImages):
name = os.path.splitext(list[i])[0]
picture_absolute_path = imagesPath + '/' + list[i]
label_absolute_path = labelsPath + '/' + name + '.txt'
picture_relative_path = os.path.basename(rootPath) + '/images/' + list[i]
if i in trainAndVal:
if i in train:
trainSet.write(picture_relative_path + '\n')
shutil.copy2(picture_absolute_path, rootPath + '/trainImages/' + list[i])
shutil.copy2(label_absolute_path, rootPath + '/trainLabels/' + name + '.txt')
else:
valSet.write(picture_relative_path + '\n')
shutil.copy2(picture_absolute_path, rootPath + '/valImages/' + list[i])
shutil.copy2(label_absolute_path, rootPath + '/valLabels/' + name + '.txt')
else:
testSet.write(picture_relative_path + '\n')
shutil.copy2(picture_absolute_path, rootPath + '/testImages/' + list[i])
shutil.copy2(label_absolute_path, rootPath + '/testLabels/' + name + '.txt')
trainSet.close()
valSet.close()
testSet.close()
if __name__ == '__main__': # 7581=6064+759+758 (比例约为8:1:1)
percentageOfTrainAndVal = 0.9001 # 训练集加验证集这两者在总数据集中所占的比重
percentageOfTrain = 0.8888 # 训练集在训练集加验证集这两者里占的比重
rootPath = r"C:\Users\15231\.conda\envs\yolo\Lib\site-packages\ultralytics\models\yolo\detect\dataSet"
main()