参考文章:https://cloud.tencent.com/developer/article/1761815
以natureDB为例
natureDB是一个自然图像数据集,一共有十个类别一个类别有100张照片。
利用脚本将图像的类别和和路径存入txt文件(这个很容易根据情况写就行了。。).
class nature_dataset():
def __init__(self, transform = None, train = True):
"""
:param transform: 选择transform:transform_train/test_train
:param train: 是否为训练集:True/False
"""
f = None
if train == True:
f = open("./train.txt", "r")
elif train == False:
f = open("./test.txt", "r")
imgs = []
labels = []
for line in f:
line = line.strip("\n")
#line = line.rstrip()
word = line.split(" ")
imgs.append((word[1], int(word[0])))
self.imgs = imgs
self.transform = transform
def __getitem__(self, index):
fn, label = self.imgs[index]
img = Image.open("./images/" + fn)
#此时就根据index得到了一张图片和其对应的标签
if np.random.rand() < 0.5:
index_1 = np.random.choice(np.arrange(len(self.imgs))[self.labels == label], 1)
else:
index_1 = np.random.choice(np.arrange(len(self.imgs))[self.labels != label], 1)
fn_1, label_1 = self.labels[index_1]
img_1 = Image.open("./images/" + fn_1)
if label == label_1:
label = 0
else:
label = 1
return img, img_1, label
def __len__(self):
return len(self.imgs)
第一次看到这种语法…(仔细想想好像是这样y[True, False, …True])
import numpy as np
y = np.array([1, 1, 2, 2, 3, 3, 4, 4])
y1 = 1
idx2 = np.random.choice(np.arange(len(y))[y == y1],1)
print(idx2)
idx3 = np.random.choice(np.arange(len(y))[ y!= y1],1)
print(np.arange(len(y)))
print(np.arange(len(y))[y == y1])
print(np.arange(len(y))[y != y1])
[1]
[0 1 2 3 4 5 6 7]
[0 1]
[2 3 4 5 6 7]