转载:同济子豪兄Label2Everything/labelme2mask/【C】划分训练-测试集.ipynb at main · TommyZihao/Label2Everything · GitHub
导入工具包
import os
import shutil
import random
from tqdm import tqdm
指定数据集路径
Dataset_Path = 'Watermelon87_Semantic_Seg_Labelme'
查看数据集目录结构
import seedir as sd
sd.seedir(Dataset_Path, style='emoji', depthlimit=1)
📁 Watermelon87_Semantic_Seg_Labelme/ ├─📁 img_dir/ └─📁 ann_dir/
创建文件夹
os.chdir(Dataset_Path)
os.mkdir('train')
os.mkdir('val')
删除系统自动生成的多余文件
查看待删除的多余文件
!find . -iname '__MACOSX'
!find . -iname '.DS_Store'
!find . -iname '.ipynb_checkpoints'
删除多余文件
!for i in `find . -iname '__MACOSX'`; do rm -rf $i;done
!for i in `find . -iname '.DS_Store'`; do rm -rf $i;done
!for i in `find . -iname '.ipynb_checkpoints'`; do rm -rf $i;done
验证多余文件已删除
!find . -iname '__MACOSX'
!find . -iname '.DS_Store'
!find . -iname '.ipynb_checkpoints'
在图像文件夹中,划分训练集和测试集
test_frac = 0.2 # 测试集比例
random.seed(123) # 随机数种子,便于复现
folder = 'img_dir'
img_paths = os.listdir(folder)
random.shuffle(img_paths) # 随机打乱
val_number = int(len(img_paths) * test_frac) # 测试集文件个数
train_files = img_paths[val_number:] # 训练集文件名列表
val_files = img_paths[:val_number] # 测试集文件名列表
print('数据集文件总数', len(img_paths))
print('训练集文件个数', len(train_files))
print('测试集文件个数', len(val_files))
将训练集图像移动至train
目录
for each in tqdm(train_files):
src_path = os.path.join(folder, each)
dst_path = os.path.join('train', each)
shutil.move(src_path, dst_path)
将测试集图像移动至val
目录
for each in tqdm(val_files):
src_path = os.path.join(folder, each)
dst_path = os.path.join('val', each)
shutil.move(src_path, dst_path)
将train
和val
剪切至img_dir
shutil.move('train', 'img_dir/train')
shutil.move('val', 'img_dir/val')
在标注文件夹中,划分训练集和测试集
folder = 'ann_dir'
os.mkdir('train')
os.mkdir('val')
将训练集标注移动至train
目录
for each in tqdm(train_files):
src_path = os.path.join(folder, each.split('.')[0]+'.png')
dst_path = os.path.join('train', each.split('.')[0]+'.png')
shutil.move(src_path, dst_path)
将测试集标注移动至val
目录
for each in tqdm(val_files):
src_path = os.path.join(folder, each.split('.')[0]+'.png')
dst_path = os.path.join('val', each.split('.')[0]+'.png')
shutil.move(src_path, dst_path)
将train
和val
剪切至ann_dir
shutil.move('train', 'ann_dir/train')
shutil.move('val', 'ann_dir/val')
删除系统自动生成的多余文件
os.chdir('../')
查看待删除的多余文件
!find . -iname '__MACOSX'
!find . -iname '.DS_Store'
!find . -iname '.ipynb_checkpoints'
删除多余文件
!for i in `find . -iname '__MACOSX'`; do rm -rf $i;done
!for i in `find . -iname '.DS_Store'`; do rm -rf $i;done
!for i in `find . -iname '.ipynb_checkpoints'`; do rm -rf $i;done
验证多余文件已删除
!find . -iname '__MACOSX'
!find . -iname '.DS_Store'
!find . -iname '.ipynb_checkpoints'