Dataset | CIFAR-100

Download and Description

1.1 official website

Loaders

1.1 code directory

在这里插入图片描述

  • images: output folder of the dataset.

1.2 simply loader

import numpy as np
import os
import matplotlib.pyplot as plt


data_dir_cifar100 = "./cifar-100-python"
class_names_cifar100 = np.load(os.path.join(data_dir_cifar100, "meta"), allow_pickle=True)


def one_hot(x, n):
    """
    convert index representation to one-hot representation
    """
    x = np.array(x)
    assert x.ndim == 1
    return np.eye(n)[x]


def _grayscale(a):
    return a.reshape(a.shape[0], 3, 32, 32).mean(1).reshape(a.shape[0], -1)


def _load_batch_cifar100(filename, dtype='float64'):
    path = os.path.join(data_dir_cifar100, filename)
    batch = np.load(path, allow_pickle=True, encoding="latin1")
    data = batch['data'] / 255.0
    labels = one_hot(batch['fine_labels'], n=100)
    return data.astype(dtype), labels.astype(dtype)


def cifar100(data_type="train", dtype='float64', grayscale=True):
    x, y = _load_batch_cifar100(data_type, dtype=dtype)

    if grayscale:
        x = _grayscale(x)

    return x, y


x_train, y_train = cifar100()
print("training dataset size = %d, label size = %d" % (len(x_train), len(y_train)))
print(y_train[0])
plt.imshow(x_train[0].reshape(32, 32), cmap='gray')
plt.show()

1.3 torch loader

import numpy as np
import torch
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
from torchvision import datasets
from torch.utils.data.sampler import SubsetRandomSampler


data_dir_cifar100 = "./"

# 加载数据
transform = transforms.Compose([
    # 数据集加载时,默认的图片格式是 numpy,所以通过 transforms 转换成 Tensor,图像范围[0, 255] -> [0.0,1.0]
    transforms.ToTensor(),
    # 使用公式进行归一化channel=(channel-mean)/std,因为transforms.ToTensor()已经把数据处理成[0,1],那么(x-0.5)/0.5就是[-1.0, 1.0]
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
train_data = datasets.CIFAR100(data_dir_cifar100, train=True, download=False, transform=transform)

# 处理下标
num_train = len(train_data)  # 获取训练数据的长度
indices = list(range(num_train))  # 将长度形成一个下标列表

# 取数据
split = int(np.floor(0.2 * num_train))  # np.floor 返回不大于输入参数的最大整数,该语句为取训练数据的五分之一
train_idx = indices[split:]  # 取前五分之一作为训练集

# 通过下标对训练集进行采样
train_sampler = SubsetRandomSampler(train_idx)  # 无放回地按照给定的索引列表采样样本元素

# 配置加载器
train_loader = torch.utils.data.DataLoader(train_data,
                                           batch_size=20,
                                           sampler=train_sampler,
                                           num_workers=0
                                           )

# 设置图片对应分类
# classes = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']  # 分类顺序是固定好的


# 显示图片
def EachImg(img):
    img = img / 2 + 0.5  # 将图像数据转换为0.0->1.0之间,才能正常对比度显示(以前-1.0->1.0色调对比度过大)
    plt.imshow(np.transpose(img, (1, 2, 0)))  # 因为在plt.imshow在现实的时候输入的是(imagesize,imagesize,channels),而def imshow(img,text,should_save=False)中,参数img的格式为(channels,imagesize,imagesize),这两者的格式不一致,我们需要调用一次np.transpose函数,即np.transpose(npimg,(1,2,0)),将npimg的数据格式由(channels,imagesize,imagesize)转化为(imagesize,imagesize,channels),进行格式的转换后方可进行显示。


# 显示前20张图片和对应分类
dataiter = iter(train_loader)  # 按批次迭代开始
images, labels = dataiter.next()  # 执行一次images.shape=torch.Size([20, 3, 32, 32]),labels.shape=torch.Size([20])
images = images.numpy()  # tensor格式转换成Numpy格式
fig = plt.figure(figsize=(25, 4))  # 画布长25宽4
for idx in np.arange(20):
    ax = fig.add_subplot(2, 10, idx + 1, xticks=[], yticks=[])  # 画布分为2行10列,现在处理第idx+1个网格
    EachImg(images[idx])  # 显示第idx张图片
    ax.set_title(str(labels[idx]))  # 分类顺序是固定好的,所以按索引可以找到对应的下标
plt.show()

1.4 pickle loader

# -*- coding:utf-8 -*-
import os
import pickle as p
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as plimg
from PIL import Image
from tqdm import tqdm


def load_CIFAR_batch(filename):
    """ load single batch of cifar """
    with open(filename, 'rb')as f:
        datadict = p.load(f, encoding='bytes')
        x = datadict[b'data']
        y = datadict[b'fine_labels']
        class_name = datadict[b'filenames']
        x = x.reshape(50000, 3, 32, 32)
        y = np.array(y)
        return x, y, class_name


if __name__ == "__main__":
    x_train, y_train, file_names = load_CIFAR_batch("./cifar-100-python/train")
    print("loading...")
    for i in tqdm(range(x_train.shape[0])):
        imgs = x_train[i]
        label = y_train[i]
        file_name = file_names[i]

        # 融合RGB
        img0 = imgs[0]
        img1 = imgs[1]
        img2 = imgs[2]
        i0 = Image.fromarray(img0)
        i1 = Image.fromarray(img1)
        i2 = Image.fromarray(img2)
        img = Image.merge("RGB", (i0, i1, i2))

        # 保存图片
        file_path = "./train/" + str(label)
        if not os.path.exists(file_path):
            os.makedirs(file_path)
        img.save(file_path + "/" + str(file_name, encoding='utf8'), "png")

    print("done.")


Reference

  1. disanda‘s blog
  2. Vivinia_Vivinia’s blog
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值