以轴承故障检测数据西储大学CWRU数据集为例
生成的txt文件打开如图所示,前面是根目录,最后一位数字是分类
代码如下:
import os
import random
# 定义大文件夹目录
base_dir = 'data/datalists/kfold/0.007' # 请替换成包含四个子文件夹的目录路径
# 定义子文件夹名称
subfolders = ['Ball0.007', 'IR0.007', 'Normal','OR0.007']
# 定义划分比例
train_ratio = 0.7
valid_ratio = 0.2
test_ratio = 0.1
# 用于存储训练集、验证集和测试集图片路径的列表
train_image_paths = []
valid_image_paths = []
test_image_paths = []
# 遍历每个子文件夹
for folder in subfolders:
folder_path = os.path.join(base_dir, folder)
# 获取子文件夹中的所有图像文件
image_files = [os.path.join(folder_path, filename) for filename in os.listdir(folder_path) if filename.endswith('.jpg')]
# 随机打乱图像文件列表
random.shuffle(image_files)
# 计算每个集合的大小
total_images = len(image_files)
train_size = int(total_images * train_ratio)
valid_size = int(total_images * valid_ratio)
test_size = total_images - train_size - valid_size
# 划分数据集
train_images = image_files[:train_size]
valid_images = image_files[train_size:train_size + valid_size]
test_images = image_files[train_size + valid_size:]
# 将训练集的图像路径添加到列表
train_image_paths.extend(train_images)
# 将验证集的图像路径添加到列表
valid_image_paths.extend(valid_images)
# 将测试集的图像路径添加到列表
test_image_paths.extend(test_images)
# 将训练集、验证集和测试集的图像路径写入txt文件,并末尾添加图片名称的第一个数字
def save_image_paths_to_txt(file_path, image_paths):
with open(file_path, 'w') as file:
for path in image_paths:
filename = os.path.basename(path)
first_digit = next((char for char in filename if char.isdigit()), None)
if first_digit:
parent_folder = os.path.basename(base_dir)
relative_path = os.path.relpath(path, base_dir) # 获取相对路径
file.write(f"{parent_folder}/{relative_path} {first_digit}\n") #将txt内的内容设置为0.007/相对路径,后面的first_digit是我的图片名第一个数字,也就是类别称
# 保存训练集的图像路径到txt文件
save_image_paths_to_txt('0.007_train.txt', train_image_paths)
# 保存验证集的图像路径到txt文件
save_image_paths_to_txt('0.007_val.txt', valid_image_paths)
# 保存测试集的图像路径到txt文件
save_image_paths_to_txt('0.007_test.txt', test_image_paths)
print("训练集、验证集和测试集的图片路径已保存到相应的txt文件中,末尾包含图片名称的第一个数字。")