CIFAR-10数据集
CIFAR-10和CIFAR-100是来自于80 million张小型图片的数据集,图片收集者是Alex Krizhevsky, Vinod Nair, and Geoffrey Hinton。
官网 http://www.cs.toronto.edu/~kriz/cifar.html
测试集和验证集效果:
CIFAR-10数据集
#encoding:utf-8
from cv2 import imwrite as imsave
import numpy as np
import os
# 解压缩,返回解压后的字典
def unpickle(file):
import pickle
with open(file, 'rb') as fo:
dicts = pickle.load(fo, encoding='bytes')
return dicts
#创建文件夹
date_dict = {0:"airplane",1:"automobile",2:"bird",3:"cat",4:"deer",5:"dog",6:"frog",7:"horse",8:"ship",9:"truck"}
for i in range(len(date_dict)):
path1 = f"train/{date_dict[i]}"
path2 = f"test/{date_dict[i]}"
if not os.path.isdir(path1):
os.makedirs(path1)
if not os.path.isdir(path2):
os.makedirs(path2)
#生成训练集图片,如果需要jpg格式,只需要改图片后缀名即可。
for j in range(1, 6):
dataName = "data_batch_" + str(j) # 读取当前目录下的data_batch12345文件,dataName其实也是data_batch文件的路径,本文和脚本文件在同一目录下。
Xtr = unpickle(dataName)
print(dataName + " is loading...")
for i in range(0, 10000):
img = np.reshape(Xtr[b'data'][i], (3, 32, 32)) # Xtr['data']为图片二进制数据
img = img.transpose(1, 2, 0) # 读取image
picName = 'train/' + str(date_dict[Xtr[b'labels'][i]])+"/"+str(i + (j - 1)*10000) + '.png' # Xtr['labels']为图片的标签,值范围0-9,本文中,train文件夹需要存在,并与脚本文件在同一目录下。
imsave(picName, img)
print (dataName + " loaded.")
print("test_batch is loading...")
# 生成测试集图片
testXtr = unpickle("test_batch")
for i in range(0, 10000):
img = np.reshape(testXtr[b'data'][i], (3, 32, 32))
img = img.transpose(1, 2, 0)
picName = 'test/' + str(date_dict[testXtr[b'labels'][i]]) + '/' + str(i) + '.png'
imsave(picName, img)
print("test_batch loaded.")
参考链接:https://blog.csdn.net/guohuifengby/article/details/62424299
参考链接:https://blog.csdn.net/weixin_44633882/article/details/86905285