代码如下,改下train_data的路径即可
import numpy as np
import os
import shutil
def read_txt(full_path: str) -> dict:
m = {}
i = 0
for line in open(full_path).readlines():
i += 1
tmp = line[:-1]
# label = np.append(label, tmp)
file_name = str(i) + '.jpg'
m[file_name] = tmp
return m
# 根据label.txt处理数据集,对每一个类别建立一个文件夹
def deal_train(path):
file_names = np.array([])
label_dict = {}
for _, _, files in os.walk(path):
for file in files:
if os.path.splitext(file)[1] == '.txt':
full_path = os.path.join(os.getcwd(), path[2:], file)
label_dict = read_txt(full_path)
break
for file in files:
if os.path.splitext(file)[1] == '.jpg':
# file_names = np.append(file_names, file)
category = label_dict[file]
if category == '1' or category == '-1':
full_path = os.path.join(os.getcwd(), path[2:], category)
if not os.path.exists(full_path):
os.mkdir(full_path)
# 将当前文件拷贝到新建的目录下
if not os.path.exists(os.path.join(full_path, file)): # 判断目标文件夹不存在该文件才进行拷贝
shutil.copy(os.path.join(os.getcwd(), path[2:], file), os.path.join(full_path, file))
train_path = './data/train'
validate_path = './data/validate'
if __name__ == '__main__':
# deal_train(train_path)
deal_train(validate_path)