制作自己的数据集
class MyDataset(Dataset):
def __init__(self,image_Path,label_Path,img_transform=None,target_transform=None):
print(os.getcwd())
self.image_Path = os.path.join(os.getcwd(),image_Path)
self.label_Path = os.path.join(os.getcwd(),label_Path)
with open(self.label_Path,'r', encoding='UTF-8') as f:
self.classes = json.load(f)
self.imgs = [] #图片路径数组
self.labels = [] #标签路径数组
for files in sorted(os.listdir(self.image_Path)):
if files.split('.')[-1] == 'jpg':
self.imgs.append(os.path.join(self.image_Path,files))
else:
self.labels.append(os.path.join(self.image_Path,files))
self.img_transform = img_transform
self.target_transform = target_transform
def __getitem__(self, index):
image_path = self.imgs[index]
image = Image.open(image_path)
label_path = self.labels[index]
label_txt=open(label_path,"r")
line_content = label_txt.readline()
label = line_content.split(' ')[1]
if self.img_transform:
transform = transforms.Compose([
transforms.Resize((400,400)),
transforms.CenterCrop(400),
transforms.ToTensor(),
transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))
])
image = transform(image)
if self.target_transform:
label = int(label)
return image,label
文件重命名
import glob
import shutil
import os
old_dir = "./label"
file_list = glob.glob(old_dir+"./*")
for file in file_list:
ori_name = os.path.split(file)[1]
new_name1 = ori_name.split(".")[0]
new_name2 = new_name1+".txt"
os.rename(file,new_name2)
shutil.move(new_name2,old_dir)
print("ok")
移动文件
import glob
import shutil
import os
filePath = "./data/anno"
out_path = './data/dark_anno_train'
if not os.path.exists(out_path):
os.mkdir(out_path)
file_list = os.listdir(filePath)
for class_name in file_list:
# matrix[i] = "filename"
# print(filePath)
# print(file_name)
file_list = glob.glob(filePath + "/" + class_name + "/*")
move_dir = out_path
if not os.path.exists(move_dir):
os.mkdir(move_dir)
print(file_list)
for input_file in file_list:
#temp = os.path.split(input_file)[1]
shutil.move(input_file,move_dir)
文件按条件筛选
from PIL import Image
import glob
import numpy as np
import shutil
import os
filePath = "./data/dark"
out_path ='./data/out'
if not os.path.exists(out_path):
os.mkdir(out_path)
file_list = os.listdir(filePath)
for class_name in file_list:
#matrix[i] = "filename"
#print(filePath)
#print(file_name)
file_list = glob.glob(filePath+"/"+class_name+"/*")
move_dir =os.path.join(out_path, class_name)
if not os.path.exists(move_dir):
os.mkdir(move_dir)
print(file_list)
for input_file in file_list:
#print(input_file)
#print(image)
image1 = Image.open(input_file)
image = np.array(image1)
#image = image.convert("RGB")
if len(image.shape) != 3 or (image1.size[0] >2000 or image1.size[1] > 2000) or image1.mode != "RGB":
print(len(image.shape))
print(image1.size)
#move_path = os.path.join(move_dir,input_file)
shutil.move(input_file, move_dir)
else:
continue
文件夹切分
import os
import shutil
def mv_file(img, num,class_name):
list_ = os.listdir(img)
if num > len(list_):
print('长度需小于:', len(list_))
exit()
num_file = int(len(list_)/num) + 1
cnt = 0
for n in range(1,num_file+1): # 创建文件夹
new_file = os.path.join(img + '_' + str(n))
move_file = os.path.join(os.path.dirname(os.path.dirname(img))+"/temp/"+class_name)
#move_file = os.path.join(move_file,"/temp/"+class_name)
move_file = os.path.join(move_file+'_'+str(n))
if os.path.exists(new_file+'_'+str(cnt)):
print('该路径已存在,请解决冲突', new_file)
exit()
if os.path.exists(move_file+'_'+str(cnt)):
print('该路径已存在,请解决冲突', move_file)
exit()
print('创建文件夹:', new_file)
print('创建文件夹:', move_file)
os.mkdir(new_file)
os.mkdir(move_file)
list_n = list_[num*cnt:num*(cnt+1)]
for m in list_n:
old_path = os.path.join(img, m)
new_path = os.path.join(new_file, m)
move_path = os.path.join(move_file,m)
shutil.copy(old_path, new_path)
shutil.move(new_path,move_path)
cnt = cnt + 1
print('============task OK!===========')
if __name__ == "__main__":
filePath = "./data/dark"
file_list = os.listdir(filePath)
for class_name in file_list:
print(class_name)
class_path = os.path.join(filePath+"/"+class_name)
print(class_path)
mv_file(class_path, 10,class_name) # 操作目录,单文件夹存放数量
目标检测 数据集转换
from PIL import Image
import glob
import numpy as np
import shutil
import os
img_dir = "./data/darktrain"
anno_dir = "./data/dark_anno_train"
file_list = glob.glob(img_dir+"/*")
def str2num(s):
digits = {'Bicycle': 0, 'Boat': 1, 'Bottle': 2, 'Bus': 3, 'Car': 4, 'Cat': 5,
'Chair': 6, 'Cup': 7, 'Dog': 8, 'Motorbike': 9, 'People': 10, 'Table': 11}
return digits[s]
for file in file_list:
img = Image.open(file)
w = img.size[0]
h = img.size[1]
file_anno = os.path.split(file)[1]
anno = os.path.join(anno_dir+"/"+file_anno+".txt")
data = ''
with open(anno,"r") as f:
lines = f.readlines()
# num_boxes = len(lines)
for line in lines:
if not line.startswith('%'):
splited = line.strip().split()
c = str2num(splited[0]) #class
width = float(splited[3]) # w
height = float(splited[4]) # h
x = (float(splited[1]) + float(width/2))/w # x
y = (float(splited[2]) + float(height/2))/h # y
width = float(splited[3])/w # w
height = float(splited[4])/h # h
s = str(c)+' '+str(x)+' '+str(y)+' '+str(width)+' '+str(height)+'\n'
data+=s
with open(anno,'w') as f:
f.write(data)
按csv文件名移动
```python
import glob
import shutil
import os
import numpy as np
import pandas as pd
images_path = "./images"
labels_path = "./labels"
out_images_path = './test_images'
out_labels_path = './test_labels'
csv_path = "./test.csv"
if not os.path.exists(out_images_path):
os.mkdir(out_images_path)
if not os.path.exists(out_labels_path):
os.mkdir(out_labels_path)
with open(csv_path,encoding = 'utf-8') as f:
train_data = np.loadtxt(f,str,delimiter = ",")
for i in range(len(train_data)):
image = train_data[i][0]
image_p = os.path.join(images_path,image)
shutil.move(image_p,out_images_path)
label = train_data[i][1]
label_p = os.path.join(labels_path, label)
shutil.move(label_p,out_labels_path)
f.close()
# file_list = os.listdir(filePath)
#
# for class_name in file_list:
# # matrix[i] = "filename"
# # print(filePath)
# # print(file_name)
# file_list = glob.glob(filePath + "/" + class_name + "/*")
# move_dir = out_path
# if not os.path.exists(move_dir):
# os.mkdir(move_dir)
# print(file_list)
# for input_file in file_list:
# #temp = os.path.split(input_file)[1]
# shutil.move(input_file,move_dir)
写csv
import os
import csv
anno_path = "./data/ExDark/labels"
exdark_path = "./data/ExDark/images"
exdark_file_list = os.listdir(exdark_path)
file_anno_list = os.listdir(anno_path)
with open("./data/ExDark/data.csv","w",newline="") as f:
for i in range(len(exdark_file_list)):
file_name = exdark_file_list[i]
file_anno_name = file_anno_list[i]
f.write(file_name+","+file_anno_name+"\n")
f.close()
切分csv
#随机切开资料
import csv
import os
import numpy as np
'''将iris.csv中的数据分成train_iris和test_iris两个csv文件,其中train_iris.csv中有120个数据,test_iris.csv中有30个数据'''
labels = []
data = []
a_train_file = './data/ExDark/train.csv'
a_test_file = './data/ExDark/test.csv'
a_file = './data/ExDark/data.csv'
seed = 3
np.random.seed(seed)
train_indices = np.random.choice(7263,5810, replace=False) # 78170是数据总数,48856是按8:2切分的训练集,29314是测试集
test_indices = np.array(list(set(range(7263)) - set(train_indices)))
#test_indices = np.random.choice(len(residue), replace=False) # 如果训练集和测试集综合的数据加起来就是一整个数据集则不需要这个操作
with open(a_file)as afile:
a_reader = csv.reader(afile) #从原始数据集中将所有数据读取出来并保存到a_reader中
#labels = next(a_reader) # 提取第一行设置为labels
for row in a_reader: # 将a_reader中每一行的数据提取出来并保存到data的列表中
data.append(row)
# 生成训练数据集
if not os.path.exists(a_train_file):
with open(a_train_file, "w", newline='') as a_trian:
writer = csv.writer(a_trian)
#writer.writerows([labels]) #第一行为标签行
writer.writerows(np.array(data)[train_indices])
a_trian.close()
# 生成测试数据集
if not os.path.exists(a_test_file):
with open(a_test_file, "w", newline='')as a_test:
writer = csv.writer(a_test)
#writer.writerows([labels]) #第一行为标签行
writer.writerows(np.array(data)[test_indices])
a_test.close()
dataset制作
import pandas as pd
from PIL import Image
import json
import os
import shutil
train_file = "train_list.txt"
file = pd.read_csv(train_file,sep=' ',header=None)[1:]
def label_to_name(num):
with open("garbage_classification.json",'r') as f:
name = json.loads((f.read()))
return name['%s'%num]
for i in range(len(file)):
path = file[0][i+1]
label_num = file[1][i+1]
label_name = label_to_name(label_num)
one_dir = label_name
move_dir = os.path.join('train/'+one_dir)
if not os.path.exists(move_dir):
os.makedirs(move_dir)
shutil.move(path,move_dir)
image-clef 处理
import glob
import shutil
import os
import numpy as np
filePath = "./c"
file_class_path = "./list/cList.txt"
num2dir = {0:"aeroplane",
1:"bike",
2:"bird",
3:"boat",
4:"bottle",
5:"bus",
6:"car",
7:"dog",
8:"horse",
9:"monitor",
10:"motorbike",
11:"people"}
with open(file_class_path,encoding = 'utf-8') as f:
file = np.loadtxt(f,str,delimiter = " ")
for i in range(len(file)):
temp = file[i][0].split("/")[-1]
file_path = "./c/"+temp
out_path = "./c/"+num2dir[int(file[i][1])]
if not os.path.exists(out_path):
os.mkdir(out_path)
if not os.path.exists(file_path):
continue
shutil.move(file_path,out_path)