目录
二进制文件(数据)CIFAR-10数据集
由10个类的60000个32*32彩色图像组成,每个类又6000个图像,有500000个训练像素和10000个测试图像
数据集分为5个培训批次和一个测试批次
二进制版本数据文件:data_batch_1.bin,data_batch_2.bin,..data_batch_5.bin以及test_batch.bin
每3073个字节是一个样本:1个目标值(图像标签)+3072像素(前1024个字节是红色通道,下1024是绿色通道,后1024是蓝色)
CIFAR-10二进制数据读取流程分析
- 构造文件队列
- 读取文件队列(读取与解码) 处理图片数据形状以及数据类型
- 批处理返回
- 开启会话线程运行
读取与解码分析
reader = tf.FixedLengthRecordReader(3073)
key,value = reader.read(file_queue)
decoded = tf.decoded_raw(value,tf.uint8)
***对tensor对象进行切片
label
***一个样本image(3072字节 = 1024r +1024g +1024b)
shape = (3,32,32) = (channels,height,width) -->转换成TensorFlow图像表示习惯
案例:二进制文件
代码:
import tensorflow as tf
import os
tf.compat.v1.disable_eager_execution()
class Cifar(object):
def __init__(self):
# 初始化操作
self.height = 32
self.width = 32
self.channels = 3
#字节数
self.image_bytes = self.height * self.width * self.channels
self.label_bytes = 1
self.all_bytes = self.label_bytes + self.image_bytes
def read_and_decode(self,file_list):
# 构建文件名队列
file_queue = tf.compat.v1.train.string_input_producer(file_list)
# 读取与解码
reader = tf.compat.v1.FixedLengthRecordReader(self.all_bytes)
# key 文件名 value一个样本
key, value = reader.read(file_queue)
print("key:\n", key)
print("value_\n", value)
# 解码
decoded = tf.compat.v1.decode_raw(value,tf.uint8)
print("decoded:\n",decoded)
# 将目标值和特征值切片
lable = tf.slice(decoded,[0],[self.label_bytes])
image = tf.slice(decoded,[1],[self.image_bytes])
print("lable:\n",lable)
print("image:\n",image)
#调整图片形状
image_reshape = tf.compat.v1.reshape(image, shape=[self.channels,self.height,self.width])
print("image_reshape:\n", image_reshape)
# 调整TensorFlow规定表示习惯(转置)
image_transposed = tf.transpose(image_reshape,[1,2,0])
print("image_transposed:\n",image_transposed)
# 调整图像类型
image_cast = tf.cast(image_transposed,tf.float32)
# 批处理
lable_batch,image_batch = tf.compat.v1.train.batch([lable,image_cast], batch_size=100, num_threads=1, capacity=100)
print("lable_batch:\n",lable_batch)
print("image_batch", image_batch)
# 开启会话
with tf.compat.v1.Session() as sess:
# 开启线程
coord = tf.compat.v1.train.Coordinator()
threads = tf.compat.v1.train.start_queue_runners(sess=sess, coord=coord)
key_new, value_new,decoded_new, lable_new,image_new, image_reshape_new, image_transposed_new = sess.run([key, value, decoded, lable,image,image_reshape,image_transposed])
lable_value,image_value= sess.run([lable_batch,image_batch])
print("key_new:\n", key_new)
print("value_new:\n", value_new)
print("decoded_new:\n", decoded_new)
print("lable_new:\n", lable_new)
print("image_new:\n",image_new)
print("image_reshape_new:\n", image_reshape_new)
print(" image_transposed_new:\n", image_transposed_new)
print("lable_value:\n", lable_value)
print("image_value:\n", image_value)
# 回收线程
coord.request_stop()
coord.join(threads)
return None
if __name__ == '__main__':
file_name = os.listdir("D:/heima/Python深度之神经网络资料/02-代码/cifar-10-batches-bin")
print("file_name:\n:",file_name)
#构建文件名路径列表
file_list = [os.path.join("D:/heima/Python深度之神经网络资料/02-代码/cifar-10-batches-bin",file) for file in file_name if file[-3:]=="bin"]
print("file_list:\n",file_list)
# 实例化Cifar
cifar = Cifar()
cifar.read_and_decode(file_list)
结果解析:
1)原始数据读取
2)解码
3)目标值与特征值切片
4)图片转为TensorFlow习惯形式
5)批处理
6)显示处理后数据详情