原文地址:https://blog.csdn.net/ls20121006/article/details/78951805
import PIL.Image as Image
from scipy.misc import imsave
import numpy as np
import random
import pickle
import os
#函数调用:生成数据集
def initPKL(imgSet_shuffle, train_or_test):
imgSet = []
labels = []
label_names = []
if train_or_test == 'train':
set_name = 'trainSet.pkl'
else:
set_name = 'testSet.pkl'
for i in imgSet_shuffle:
imgSet.append(i[0])
labels.append(i[1])
label_names.append(i[2])
imgSet = np.array(imgSet)
labels = np.array(labels)
label_names = np.array(label_names)
arr = (imgSet,labels,label_names)
#写入文件
data = (arr[0],arr[1],arr[2])
output = open(set_name, 'wb')
pickle.dump(data, output)
output.close()
def initArr(folders_path):
i = 0
imgSet = []
folders = os.listdir(folders_path)
for folder in folders:
#类别个数,几个0代表几类
label = [0,0]
files = os.listdir(folders_path + folder)
label[i] = 1
for file in files:
#读取图片
img_arr = np.array(Image.open(folders_path + folder + '/' + file)) / 255
imgSet.append((img_arr, label, folder))
i += 1
return imgSet
#将图片转换成数组
train_folders_path = 'E:/workFolder/data/cifar/cifar_10/train/'
test_folders_path = 'E:/workFolder/data/cifar/cifar_10/test/'
train_imgSet = initArr(train_folders_path)
test_imgSet = initArr(test_folders_path)
#打乱顺序
random.shuffle(train_imgSet)
random.shuffle(test_imgSet)
train_set_shuffle = np.array(train_imgSet)
test_set_shuffle = np.array(test_imgSet)
# 分别生成训练集和测试集
initPKL(train_set_shuffle, 'train')
initPKL(test_set_shuffle, 'test')
#测试生成的数据集
f = open('./trainSet.pkl', 'rb')
x, y, z = pickle.load(f)
f.close()
print(np.shape(x[3]), y[3], z[3])