import pickle
import numpy as np
from mindspore.dataset import GeneratorDataset
def setup():
def load(fileName: str):
with open(file=fileName, mode="rb") as handler:
data = pickle.load(file=handler, encoding="latin1")
return data
trains = load("D:/dataset/cifar-100-python/train")
tests = load("D:/dataset/cifar-100-python/test")
metas = load("D:/dataset/cifar-100-python/meta")
return trains, tests, metas
def train():
trains, tests, metas = setup()
print(trains["data"][0],trains["coarse_labels"][0],trains["fine_labels"][0] )
print(len(trains["coarse_labels"]))
for key in trains.keys():
print(f"key = {key}, len(trains[key]) = {len(trains[key])}")
print("--------------------------------------------------")
for key in tests.keys():
print(f"key = {key}, len(tests[key]) = {len(tests[key])}")
print("--------------------------------------------------")
for key in metas.keys():
print(f"key = {key}, len(metas[key]) = {len(metas[key])}")
#
class LoadCifar100Dataset():
def __init__(self,type):
self.type = type
self.trains, self.tests = self.setup()
def __len__(self):
if self.type == "train":
return len(self.trains["coarse_labels"])
else:
return len(self.tests["coarse_labels"])
def __getitem__(self, idx):
if self.type=="train":
# return {
# "data":np.reshape(self.trains["data"][idx].astype(np.float32), (3,32,32)),
# "coarse_labels":self.trains["coarse_labels"][idx],
# "fine_labels":self.trains["fine_labels"][idx]
# }
#返回的是元组
return (np.reshape(self.trains["data"][idx].astype(np.float32), (3,32,32)), self.trains["coarse_labels"][idx], self.trains["fine_labels"][idx])
else:
return (np.reshape(self.tests["data"][idx].astype(np.float32), (3,32,32)), self.tests["coarse_labels"][idx], self.tests["fine_labels"][idx])
def setup(self):
def load(fileName: str):
with open(file=fileName, mode="rb") as handler:
data = pickle.load(file=handler, encoding="latin1")
return data
trains = load("D:/dataset/cifar-100-python/train")
tests = load("D:/dataset/cifar-100-python/test")
return trains, tests
if __name__ == '__main__':
# train()
cifar100_train_dataset = LoadCifar100Dataset("train")
cifar100_test_dataset = LoadCifar100Dataset("test")
train_generator = GeneratorDataset(cifar100_train_dataset,column_names=["image","coarse_labels","fine_labels"])
train_generator = train_generator.batch(10)
size = train_generator.get_dataset_size()
print(size)
for i, (imgs, coarse_labels, fine_labels) in enumerate(train_generator.create_tuple_iterator()):
print(imgs.shape)
数据集下载:
浏览器打开:https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz 进行下载至本地。