# 划分数据集
import os
import shutil
image_path = '/home/gyp/codenew/QueryDet/detectron2-main/datasets/coco128-yolo/images/train2017' # 图片文件
txt_path = '/home/gyp/codenew/QueryDet/detectron2-main/datasets/coco128-yolo/labels/train2017' # 标签文件
new_file_path = '/home/gyp/codenew/QueryDet/detectron2-main/datasets/coco128-split' # 划分数据后的文件
train_rate = 0.8 # 训练集比例
val_rate = 0.2 # 验证集比例
# 将有对应标签的图片找出来,放到新文件夹下
labels = []
for label in os.listdir(txt_path):
labels.append(os.path.splitext(label)[0])
for image_name in os.listdir(image_path):
image_name = os.path.splitext(image_name)[0]
if image_name in labels:
image_name = image_name + ".jpg"
shutil.copy(image_path + '/' + image_name, new_file_path)
# 计算训练集与验证集数量
images = []
for image in os.listdir(new_file_path):
images.append(image)
total = len(images)
train_images = images[0:int(train_rate * total)]
val_images = images[int(train_rate * total):int((train_rate + val_rate) * total)]
# 图片-train
for image in train_images:
print(image)
old_path = new_file_path + '/' + image
new_path1 = new_file_path + '/' + 'images' + '/' + 'train'
# new_path1 = new_file_path + '/' + 'train' + '/' + 'images'
if not os.path.exists(new_path1):
os.makedirs(new_path1)
# new_path = new_path1 + '/' + image
shutil.copy(old_path, new_path1)
# 图片-val
for image in val_images:
old_path = new_file_path + '/' + image
new_path1 = new_file_path + '/' + 'images' + '/' + 'val'
# new_path1 = new_file_path + '/' + 'val' + '/' + 'images'
if not os.path.exists(new_path1):
os.makedirs(new_path1)
# new_path = new_path1 + '/' + image
shutil.copy(old_path, new_path1)
# 标签-train
images1 = []
for image in os.listdir(new_file_path + '/' + 'images' + '/' + 'train'):
images1.append(os.path.splitext(image)[0])
for label_name in os.listdir(txt_path):
label_name = os.path.splitext(label_name)[0]
if label_name in images1:
label_name = label_name + ".txt"
label_train_path = new_file_path + '/' + 'labels' + '/' + 'train'
if not os.path.exists(label_train_path):
os.makedirs(label_train_path)
shutil.copy(txt_path + '/' + label_name, label_train_path)
#shutil.copy(txt_path + '/' + 'classes.txt', label_train_path)
# 标签-val
images2 = []
for image in os.listdir(new_file_path + '/' + 'images' + '/' + 'val'):
images2.append(os.path.splitext(image)[0])
for label_name in os.listdir(txt_path):
label_name = os.path.splitext(label_name)[0]
if label_name in images2:
label_name = label_name + ".txt"
label_val_path = new_file_path + '/' + 'labels' + '/' + 'val'
if not os.path.exists(label_val_path):
os.makedirs(label_val_path)
shutil.copy(txt_path + '/' + label_name, label_val_path)
#shutil.copy(txt_path + '/' + 'classes.txt', label_val_path)
# 删除新文件夹下对应标签的图片
for name in os.listdir(new_file_path):
if name.endswith('.jpg'):
os.remove(os.path.join(new_file_path, name))
# 计算训练集与验证集数量
images = []
for image in os.listdir(new_file_path):
images.append(image)
total = len(images)
train_images = images[0:int(train_rate * total)]
val_images = images[int(train_rate * total):int((train_rate + val_rate) * total)]
划分数据集
于 2023-10-31 20:41:38 首次发布