二进制数据:
1、CIFAR10二进制数据集介绍:
每3073个字节是一个样本
1个目标值+3072像素(1024字节红色通道 1024字节绿色通道 1024字节蓝色通道)
2、CIFAR10 二进制数据读取
流程:
1)构造文件名队列
2)读取与解码
#读取
reader=tf.FixedLengthRecordReader(3073)
key,value=reader.read(file_queue)
#解码
decoded=tf.decode_raw(value,tf.uint8)
*对TensorFlow对象进行切片
*一个样本image(3072字节=1024r+1024g+1024b)
*shape转换为TensorFlow的图像表示习惯
图片形状、类型调整完毕
ndarray.T 转置 行变列 列变行(3,4)——(4,3)
3)批处理
二进制文件读取案例:
import tensorflow as tf
import os
class Cifar(object):
def __init__(self):
#初始化操作
self.height=32
self.width=32
self.channels=3
#字节数
self.image_bytes=self.image=self.height*self.width*self.channels
self.label_bytes=1
self.all_bytes=self.image_bytes+self.label_bytes
def read_and_decode(self,file_list):
#1、构造文件名队列
file_queue=tf.train.string_input_producer(file_list)
#2、读取与解码
#读取阶段
reader=tf.FixedLengthRecordReader(self.all_bytes)
#key文件名value一个样本
key,value=reader.read(file_queue)
print("key:\n",key)
print("value:\n",value)
#解码阶段
decoded=tf.decode_raw(value,tf.uint8)
print("decoded:\n",decoded)
#将目标值与特征值切开
label=tf.slice(decoded,[0],[self.label_bytes])
image=tf.slice(decoded,[self.label_bytes],[self.image_bytes])
print("label:\n",label)
print("image:\n",image)
#调整图片形状
image_reshaped=tf.reshape(image,shape=[self.channels,self.height,self.width])
print("image_reshaped:\n",image_reshaped)
#转置,将图片的顺序转为height\width\channels
image_transposed= tf.transpose(image_reshaped,[1,2,0])
print("image_reshaped:\n",image_transposed)
#调整图像类型(从unuite8变为float32)
image_cast=tf.cast(image_transposed,tf.float32)
#3、批处理
label_batch,image_batch=tf.train.batch([label,image_cast],batch_size=100,num_threads=1,capacity=100)
print("label_batch:\n",label_batch)
print("image_batch:\n",image_batch)
# 开启会话
with tf.Session() as sess:
#开启线程
coord=tf.train.Coordinator()
threads=tf.train.start_queue_runners(sess=sess,coord=coord)
key_new,value_new,decoded_new,label_new,image_new,image_reshaped_new,image_transposed_new,label_value,image_value=sess.run([key,value,decoded,label,image,image_reshaped,image_transposed,label_batch,image_batch])
print('key_new:\n',key_new)
print("value_new:\n",value_new)
print("decoded_new:\n",decoded_new)
print("label_new:\n",label_new)
print("image_new:\n",image_new)
print("image_reshaped_new:\n",image_reshaped_new)
print("image_transposed_new:\n",image_transposed_new)
print("label_value:\n",label_value)
print("image_value:\n",image_value)
#回收线程
coord.request_stop()
coord.join(threads)
return None
if __name__=="__main__":
file_name=os.listdir("./cifar-10-batches-bin")
print("file_name:\n",file_name)
#构造文件名路径列表
file_list=[os.path.join("./cifar-10-batches-bin/",file) for file in file_name if file[-3:]=="bin"]
print("file_list:\n",file_list)
#实例化Cifar
cifar=Cifar()
cifar.read_and_decode(file_list)
程序中出现的相关二进制文件见上篇博客链接里cifar-10-batches-bin文件