import os
from pathlib import Path
import shutil
import random
"""
复制指定扩展名的文件从源目录到目标目录(包括子目录)。
:param src_dir: 源目录路径
:param txt_dir: 目标标签目录路径
:param img_dir: 目标图片目录路径
:param file_extension: 要复制的文件扩展名,默认为'.txt'
"""
def copy_specific_files(src_dir, txt_dir, img_dir, file_extension='.txt'):
endswith = ['.xbm', '.tif', 'pjp', '.svgz', 'jpg', 'jpeg', 'ico', 'tiff', '.gif', 'svg', '.jfif', '.webp', '.png',
'.bmp', 'pjpeg', '.avif']
endswith_tuple = tuple(endswith)
if not os.path.exists(txt_dir):
os.makedirs(txt_dir) # 如果目标目录不存在,则创建它
if not os.path.exists(img_dir):
os.makedirs(img_dir) # 如果目标目录不存在,则创建它
for root, dirs, files in os.walk(src_dir):
for file in files:
if file.endswith(file_extension): # 检查文件扩展名
if file == "classes.txt":
break
src_file = os.path.join(root, file) # 构建源文件完整路径
dst_file = os.path.join(txt_dir, file) # 构建目标文件完整路径
shutil.copy2(src_file, dst_file) # 复制文件
if file.endswith(endswith_tuple):
src_file = os.path.join(root, file) # 构建源文件完整路径
dst_file = os.path.join(img_dir, file) # 构建目标文件完整路径
shutil.copy2(src_file, dst_file) # 复制文件
"""
复制指定扩展名的文件从源目录到目标目录(包括子目录)。
:param file_path: 源图片目录路径
:param file_path_label: 源标签路径
:param new_file_path: 目标图片路径
:param new_file_path_label: 目标标签路径
"""
def train_test_split(file_path, file_path_label, new_file_path, new_file_path_label):
# 划分数据比例6:2:2
split_rate = [0.6, 0.2, 0.2]
class_names = os.listdir(file_path) # 数据集文件和对应标签的名字是一样的
class_names_label = os.listdir(file_path_label)
# 目标文件夹下创建文件夹
split_names = ['train', 'val', 'test']
split_names_label = ['train', 'val', 'test']
print(class_names) # ['00000.jpg', '00001.jpg', '00002.jpg'... ]
print(class_names_label)
# 判断是否存在目标文件夹,不存在则创建---->创建train\val\test文件夹
if os.path.isdir(new_file_path):
pass
else:
os.makedirs(new_file_path)
if os.path.isdir(new_file_path_label):
pass
else:
os.makedirs(new_file_path_label)
for split_name in split_names:
split_path = new_file_path + "/" + split_name
split_path_label = new_file_path_label + "/" + split_name
print(split_path) # D:/Code/Data/GREENTdata/train, val, test
print(split_path_label)
if os.path.isdir(split_path):
pass
else:
os.makedirs(split_path)
if os.path.isdir(split_path_label):
pass
else:
os.makedirs(split_path_label)
# 按照比例划分数据集,并进行数据图片的复制
for class_name in class_names:
current_data_path = file_path # D:/Code/Data/centerlinedata/tem_voc/JPEGImages/
current_data_path_label = file_path_label
current_all_data = os.listdir(current_data_path)
current_all_data_label = os.listdir(current_data_path_label)
# os.listdir() 方法用于返回指定的文件夹包含的文件或文件夹的名字的列表。这个列表以字母顺序排列。
current_data_length = len(current_all_data) # 文件夹下的图片个数
current_data_length_label = len(current_all_data_label)
current_data_index_list = list(range(current_data_length))
current_data_index_list_label = list(range(current_data_length_label))
# 函数list()将range()的结果直接转换为列表,是一个数字列表。
random.shuffle(current_data_index_list) # 将列表顺序打乱
random.shuffle(current_data_index_list_label)
train_path = os.path.join(new_file_path, 'train/') # D:/Code/Data/GREENTdata/train/
train_path_label = os.path.join(new_file_path_label, 'train/')
val_path = os.path.join(new_file_path, 'val/') # D:/Code/Data/GREENTdata/val/
val_path_label = os.path.join(new_file_path_label, 'val/')
test_path = os.path.join(new_file_path, 'test/') # D:/Code/Data/GREENTdata/test/
test_path_label = os.path.join(new_file_path_label, 'test/')
train_stop_flag = current_data_length * split_rate[0]
train_stop_flag_label = current_data_length_label * split_rate[0]
val_stop_flag = current_data_length * (split_rate[0] + split_rate[1])
val_stop_flag_label = current_data_length_label * (split_rate[0] + split_rate[1])
current_idx = 0
train_num = 0
val_num = 0
test_num = 0
# 图片复制到文件夹中
for i in current_data_index_list:
src_img_path = os.path.join(current_data_path, current_all_data[i])
src_img_path_label = os.path.join(current_data_path_label, current_all_data_label[i])
if current_idx < train_stop_flag:
shutil.copy2(src_img_path, train_path)
shutil.copy2(src_img_path_label, train_path_label)
train_num += 1
elif (current_idx > train_stop_flag) and (current_idx <= val_stop_flag):
shutil.copy2(src_img_path, val_path)
shutil.copy2(src_img_path_label, val_path_label)
val_num += 1
else:
shutil.copy2(src_img_path, test_path)
shutil.copy2(src_img_path_label, test_path_label)
test_num += 1
current_idx += 1
print("Done!", train_num, val_num, test_num)
if __name__ == '__main__':
rpath = Path("D:/work/yolov5/datasets/2024/")
for root, dirs, files in os.walk(rpath):
for file in files:
new_file=file.replace("龙华五岔路口五个方向","1")
src_file = os.path.join(root, file)
print(src_file+'---'+new_file)
shutil.move(src_file,os.path.join(root, new_file))
# if __name__ == '__main__':
# # 源文件夹路径
# file_path = r"D:\work\yolov5\my_data\images"
# file_path_label = r"D:\work\yolov5\my_data\labels"
# # 新文件路径
# new_file_path = r"D:\work\yolov5\my_data\2024\img"
# new_file_path_label = r"D:\work\yolov5\my_data\2024\label"
# train_test_split(file_path, file_path_label, new_file_path, new_file_path_label)
将团队标注好的数据,按比例复制到对应目录