将CIFAR10数据集拆分为train、val以及test

最新推荐文章于 2025-04-06 17:12:43 发布

coolsunxu

最新推荐文章于 2025-04-06 17:12:43 发布

阅读量2.7k

点赞数

分类专栏： Python 文章标签： python CIFAR10 densenet

本文链接：https://blog.csdn.net/coolsunxu/article/details/102531046

版权

Python 专栏收录该内容

55 篇文章

订阅专栏

import cv2
import  numpy as np
from pathlib import Path
from tqdm import tqdm

import pickle

#先读标签
with open('cifar-10-batches-py/batches.meta', 'rb') as fo:
	dict_meta = pickle.load(fo, encoding='bytes')
label_names = dict_meta[b'label_names']

#训练集和验证集
single_train_number = 9000
train_paths = ['cifar-10-batches-py/data_batch_1','cifar-10-batches-py/data_batch_2','cifar-10-batches-py/data_batch_3',
				'cifar-10-batches-py/data_batch_4','cifar-10-batches-py/data_batch_5']
				
for i in tqdm(range(len(train_paths))):
	with open(train_paths[i], 'rb') as fo:
		dict = pickle.load(fo, encoding='bytes')
		
	for j in tqdm(range(single_train_number-1)):
		path = Path("./cifar-10-batches-py/train/")
		tag = str(label_names[dict[b'labels'][j]])+'_'+str(dict[b'labels'][j])
		path = path/tag
		if path.exists()==False :
			path.mkdir(parents=True)
		image_path = "./cifar-10-batches-py/train/"+tag+'/'+tag+'_'+str((i+1)*j)+'.jpg'
		a = dict[b'data'][j]
		b = a[0:1024].reshape((32,32),order='C')[:,:,np.newaxis]
		c = a[1024:2048].reshape((32,32),order='C')[:,:,np.newaxis]
		d = a[2048:3072].reshape((32,32),order='C')[:,:,np.newaxis]
		img = np.concatenate((b,c,d),axis = 2)
		cv2.imwrite(image_path, img)
		
	for j in tqdm(range(len(dict[b'data'])-single_train_number)):
		path = Path("./cifar-10-batches-py/val/")
		j = j + single_train_number
		tag = str(label_names[dict[b'labels'][j]])+'_'+str(dict[b'labels'][j])
		path = path/tag
		if path.exists()==False :
			path.mkdir(parents=True)
		image_path = "./cifar-10-batches-py/val/"+tag+'/'+tag+'_'+str((i+1)*j)+'.jpg'
		a = dict[b'data'][j]
		b = a[0:1024].reshape((32,32),order='C')[:,:,np.newaxis]
		c = a[1024:2048].reshape((32,32),order='C')[:,:,np.newaxis]
		d = a[2048:3072].reshape((32,32),order='C')[:,:,np.newaxis]
		img = np.concatenate((b,c,d),axis = 2)
		cv2.imwrite(image_path, img)

#测试集
test_path = 'cifar-10-batches-py/test_batch'

with open(test_path, 'rb') as fo:
	dict = pickle.load(fo, encoding='bytes')
		
for j in tqdm(range(len(dict[b'labels']))):
	path = Path("./cifar-10-batches-py/test/")
	tag = str(label_names[dict[b'labels'][j]])+'_'+str(dict[b'labels'][j])
	path = path/tag
	if path.exists()==False :
		path.mkdir(parents=True)
	image_path = "./cifar-10-batches-py/test/"+tag+'/'+tag+'_'+str((i+1)*j)+'.jpg'
	a = dict[b'data'][j]
	b = a[0:1024].reshape((32,32),order='C')[:,:,np.newaxis]
	c = a[1024:2048].reshape((32,32),order='C')[:,:,np.newaxis]
	d = a[2048:3072].reshape((32,32),order='C')[:,:,np.newaxis]
	img = np.concatenate((b,c,d),axis = 2)
	cv2.imwrite(image_path, img)