1:循环遍历读取文件夹下所有图片
for filename in os.listdir(r"./"+img_dir):
print(filename)
2:从文件(name.txt)读取图片这个是一个label文件用tab分割的
str = []
with open(label_path,'r',encoding='utf-8') as fp1:
lines = fp1.read().strip().split('\n')
for line in lines:
str.append(line.split("\t")[0])
print(str)
3:分割数据集(一般框架里面有自带的分割文件)
import math
import os
import random
import shutil
import cv2
import argparse
parser = argparse.ArgumentParser("数据集分割")
parser.add_argument('--img_dir', type=str, default="", help="图片文件夹",required=False)
parser.add_argument('--label_path', type=str, default="", help="标签位置",required=False)
parser.add_argument('--out_dir', type=str, default="", help='输出文件夹',required=False)
parser.add_argument('--lite_rate', type=float, default="0.8", help='比例',required=False)
parser.add_argument('--isMix', type=bool, default=False, help='是否随机分配',required=False)
args = parser.parse_args()
img_dir = args.img_dir
label_path = args.label_path
out_dir = args.out_dir
lite_rate = float(args.lite_rate)
isMix = bool(args.isMix)
if os.path.exists(out_dir) == 0: #判断括号里的文件是否存在的意思,括号内的可以是文件路径
os.makedirs(out_dir) #递归创建目录。
filename_train = []
filename_test = []
file_names = os.listdir(img_dir)
file_names = list(filter(lambda x: x.endswith('jpg'),file_names)) #去掉列表中的非jpg格式的文件
if isMix:
random.shuffle(file_names)
for i in range(len(file_names)):
if i < math.floor(lite_rate*len(file_names)): #将80%的图片设置为训练集
filename_train.append(file_names[i])
sub_path = os.path.join(out_dir,'train')
elif i < len(file_names): #将20%的图片设置为测试集
filename_test.append(file_names[i])
sub_path = os.path.join(out_dir,'test')
try:
shutil.copy(os.path.join(img_dir, file_names[i]), os.path.join(sub_path, file_names[i]))
except(shutil.SameFileError):
continue
with open(label_path,'r',encoding='utf-8') as fp1:
train_label = out_dir+"/train/train.txt"
test_label = out_dir + "/test/test.txt"
train_file = open(train_label,'w',encoding='utf-8')
test_file = open(test_label, 'w', encoding='utf-8')
lines = fp1.read().strip().split('\n')
for line in lines:
img_name = line.split("\t")[0]
if img_name in filename_train:
train_file.write(line+'\n')
if img_name in filename_test:
test_file.write(line+'\n')
4:open的2种打开方式
第一种
fp1 = open(train_label,'w',encoding='utf-8')
fp1 .write(str)
fp1 .write(str[])
第二种
with open(train_label,'w',encoding='utf-8') as fp1:
lines = fpq.read()
for line in lines:
5:创建目录
os.makedirs(new_path)
6:创建文件并打开
fp1 = open(path,'w',encoding='utf-8')
7:遍历循环多级目录
for root_dir,sub_dirs,file in os.walk(old_path):
for sub_dir in sub_dirs:
8: 得到目录下所有文件
file_names = os.listdir(os.path.join(root_dir,sub_dir))
#去掉某些格式文件
file_names = list(filter(lambda x: x.endswith('jpg'),file_names))
9:打乱顺序
random.shuffle(file_names)
10:判断目录是否存在
os.path.exists(sub_path) == 0
11:复制文件
shutil.copy(oldpath,newpath)