import os
import shutil
import random
def ensure_dir(directory):
"""确保目录存在,如果不存在则创建"""
if not os.path.exists(directory):
os.makedirs(directory)
def move_files(source_pairs, dest_dir, category):
"""将文件从源路径移动到目标目录,并打印移动结果"""
for image_path, label_path in source_pairs:
try:
image_dest_dir = os.path.join(dest_dir, "images")
label_dest_dir = os.path.join(dest_dir, "labels")
ensure_dir(image_dest_dir)
ensure_dir(label_dest_dir)
image_base = os.path.basename(image_path)
label_base = os.path.basename(label_path)
# 移动图像和标签文件
shutil.move(image_path, os.path.join(image_dest_dir, image_base))
shutil.move(label_path, os.path.join(label_dest_dir, label_base))
print(f"Moved to {category}: {image_path} and {label_path}")
except Exception as e:
print(f"Error moving {image_path} and {label_path} to {category}: {e}")
# 指定源文件夹路径
source_image_dir = "D:\\hzx_study\\gispro_\\test-4-标注对象-1\\1\\images"
source_label_dir = "D:\\hzx_study\\gispro_\\test-4-标注对象-1\\1\\labels"
# 指定目标数据位置
dataset_split_dir = "D:\\hzx_study\\gispro_\\test-4-标注对象-1\\分类了"
# 创建目标数据集目录的路径列表
test_dir = os.path.join(dataset_split_dir, "test")
train_dir = os.path.join(dataset_split_dir, "train")
valid_dir = os.path.join(dataset_split_dir, "valid")
# 创建有效图像和标签列表
valid_image_label_pairs = []
# 遍历 source images 文件夹下的所有图片
for image_name in os.listdir(source_image_dir):
image_path = os.path.join(source_image_dir, image_name)
# 假设图像名称和标签名称匹配,标签是XML格式
label_name = os.path.splitext(image_name)[0] + ".xml"
label_path = os.path.join(source_label_dir, label_name)
# 检查图像是否有对应的标签文件
if os.path.exists(label_path):
valid_image_label_pairs.append((image_path, label_path))
else:
print(f"No corresponding label file for {image_path}. Deleting image.")
os.remove(image_path)
# 随机分配数据到训练集、测试集和验证集
random.shuffle(valid_image_label_pairs)
split_ratio = len(valid_image_label_pairs) // 10 # 假设每份是10%
test_pairs = valid_image_label_pairs[:split_ratio]
valid_pairs = valid_image_label_pairs[split_ratio:2*split_ratio]
train_pairs = valid_image_label_pairs[2*split_ratio:]
# 移动文件到相应的目录
move_files(train_pairs, train_dir, "train")
move_files(test_pairs, test_dir, "test")
move_files(valid_pairs, valid_dir, "valid")
print("Files have been categorized into train, test, and valid sets.")