Tfrecord文件是tensorflow专门设计的一种训练样本储存格式,将训练样本打包成tfrecord格式后能够加快文件的读取效率。所以训练网络的第一步就是将自己的训练集样本打包生成tfrecord格式。本文主要介绍两种tfrecord打包方式,这两种方式的主要区别在于生成的tfrecord文件大小不同。
方式一:利用常用图像处理库读取图像并解码,转换成二进制文件进行存储,网络上找到的基本上都是这种方式。
写入tfrecord文件
def data_to_tfrecord(images, labels, filename): # images中存储的是所有图像路径的一个列表
""" Save data into TFRecord """ # labels是images中每个图像对应的标签
if os.path.isfile(filename): # filename是tfrecord文件名称
print("%s exists" % filename)
return
print("Converting data into %s ..." % filename)
writer = tf.python_io.TFRecordWriter(filename)
for index, img_file in zip(labels, images):
img1 = Image.open(img_file) # 通过PIL包中的Images函数读取、解码图片
width, height = img1.size # 获取图像的宽、高参数
img_raw = img1.tobytes() # 将图像转换成二进制序列
label = int(index) # 图片对应的标签
example = tf.train.Example(
features=tf.train.Features(
feature={
'label': tf.train.Feature(int64_list=tf.train.Int64List(value=[label])), # 保存标签
'img_raw': tf.train.Feature(bytes_list=tf.train.BytesList(value=[img_raw])), # 保存二进制序列
'img_width': tf.train.Feature(int64_list=tf.train.Int64List(value=[width])), # 保存图像的宽度
'img_height': tf.train.Feature(int64_list=tf.train.Int64List(value=[height])) # 保存图像的高
}
)
)
writer.write(example.SerializeToString()) # Serialize To String
writer.close()
读取tfrecord文件
import numpy as np
import tensorflow as tf
import tensorlayer as tl
def read_and_decode(filename):
""" Return tensor to read from TFRecord """
filename_queue = tf.train.string_input_producer([filename])
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(
serialized_example, features={
'label': tf.FixedLenFeature([], tf.int64), # 从tfrecord文件中读取各种信息
'img_raw': tf.FixedLenFeature([], tf.string),
'img_width': tf.FixedLenFeature([], tf.int64),
'img_height': tf.FixedLenFeature([], tf.int64)
}
)
# You can do more image distortion here for training data
width = tf.cast(features['img_width'], tf.int32) # 转型
height = tf.cast(features['img_height'], tf.int32)
img = tf.decode_raw(features['img_raw'], tf.uint8) # 从二进制文件转成uint8
img = tf.reshape(img, [height, width, 3]) # 对图像进行reshape,注意在tfrecord文件中存储的是一序列,并没有形状
img = tf.image.resize_images(img, [32, 32]) # 将图像统一到同一尺寸
# img = tf.cast(img, tf.float32) #* (1. / 255) - 0.5
label = tf.cast(features['label'], tf.int32)
return img, label
# Example to visualize data
img, label = read_and_decode("train.tfrecord")
img_batch, label_batch = tf.train.shuffle_batch([img, label],
batch_size=4,
capacity=5000,
min_after_dequeue=100,
num_threads=1)
print("img_batch : %s" % img_batch._shape)
print("label_batch : %s" % label_batch._shape)
init = tf.initialize_all_variables()
with tf.Session() as sess:
sess.run(init)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
for i in range(3): # number of mini-batch (step)
print("Step %d" % i)
val, l = sess.run([img_batch, label_batch])
# exit()
print(val.shape, l)
tl.visualize.images2d(val, second=1, saveable=False, name='batch'+str(i), dtype=np.uint8, fig_idx=2020121)
coord.request_stop()
coord.join(threads)
sess.close()
方式二:利用tf.gfile.FastGFile读取图像信息(貌似并没有解码),转换成二进制文件存储。
这个方法是我在看tensorflow在github的slim框架中的生成tfrecord文件所使用的方法。
写入tfrecord文件
def data_to_tfrecord(images, labels, filename):
""" Save data into TFRecord """
if os.path.isfile(filename):
print("%s exists" % filename)
return
print("Converting data into %s ..." % filename)
writer = tf.python_io.TFRecordWriter(filename)
for index, img_file in zip(labels, images):
img1 = Image.open(img_file)
width, height = img1.size
# img_raw = img1.tobytes()
img_raw = tf.gfile.FastGFile(img_file, 'rb').read() # 与方式一不同的是使用的FastGFile函数
label = int(index)
example = tf.train.Example(
features=tf.train.Features(
feature={
'label': tf.train.Feature(int64_list=tf.train.Int64List(value=[label])),
'img_raw': tf.train.Feature(bytes_list=tf.train.BytesList(value=[img_raw])),
'img_width': tf.train.Feature(int64_list=tf.train.Int64List(value=[width])),
'img_height': tf.train.Feature(int64_list=tf.train.Int64List(value=[height]))
}
)
)
writer.write(example.SerializeToString()) # Serialize To String
writer.close()
读取tfrecord文件
import numpy as np
import tensorflow as tf
import tensorlayer as tl
def read_and_decode(filename):
""" Return tensor to read from TFRecord """
filename_queue = tf.train.string_input_producer([filename])
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(
serialized_example, features={
'label': tf.FixedLenFeature([], tf.int64),
'img_raw': tf.FixedLenFeature([], tf.string),
'img_width': tf.FixedLenFeature([], tf.int64),
'img_height': tf.FixedLenFeature([], tf.int64)
}
)
# You can do more image distortion here for training data
width = tf.cast(features['img_width'], tf.int32)
height = tf.cast(features['img_height'], tf.int32)
# img = tf.decode_raw(features['img_raw'], tf.uint8)
img = tf.image.decode_jpeg(features['img_raw']) # 与方式一的不同点在于需要用decode_jpeg解码
img = tf.reshape(img, [height, width, 3])
img = tf.image.resize_images(img, [32, 32])
# img = tf.cast(img, tf.float32) #* (1. / 255) - 0.5
label = tf.cast(features['label'], tf.int32)
return img, label
# Example to visualize data
img, label = read_and_decode("train")
img_batch, label_batch = tf.train.shuffle_batch([img, label],
batch_size=4,
capacity=5000,
min_after_dequeue=100,
num_threads=1)
print("img_batch : %s" % img_batch._shape)
print("label_batch : %s" % label_batch._shape)
init = tf.initialize_all_variables()
with tf.Session() as sess:
sess.run(init)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
for i in range(3): # number of mini-batch (step)
print("Step %d" % i)
val, l = sess.run([img_batch, label_batch])
# exit()
print(val.shape, l)
tl.visualize.images2d(val, second=1, saveable=False, name='batch'+str(i), dtype=np.uint8, fig_idx=2020121)
coord.request_stop()
coord.join(threads)
sess.close()
两种方式的区别
两种方式虽然在代码上就那么一两行的区别,当对于生成的tfrecord文件还是有很大的区别的。我用的同样的图像样本集,约200M左右,用方式一生成的tfrecord文件约900M,用方式二生成的tfrecord文件约200M。很明显在占用内存方面有着很大的区别。据我个人猜测,方案一将图像解码后在转成二进制文件的,方案二并没有解码而是直接转成二进制文件进行存储,所以在读取时需要进行图像解码。这仅是我个人猜测,如果有懂的大神,还望赐教。