问题
问题是这样的,要把一个数组存到tfrecord中,然后读取
a = np.array([[0, 54, 91, 153, 177,1],
[0, 50, 89, 147, 196],
[0, 38, 79, 157],
[0, 49, 89, 147, 177],
[0, 32, 73, 145]])
图片我都存储了,这个不还是小意思,一顿操作
import tensorflow as tf
import numpy as np
def _int64_feature(value):
if not isinstance(value,list):
value = [value]
return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
# Write an array to TFrecord.
# a is an array which contains lists of variant length.
a = np.array([[0, 54, 91, 153, 177,1],
[0, 50, 89, 147, 196],
[0, 38, 79, 157],
[0, 49, 89, 147, 177],
[0, 32, 73, 145]])
writer = tf.python_io.TFRecordWriter('file')
for i in range(a.shape[0]):
feature = {'i' : _int64_feature(i),
'data': _int64_feature(a[i])}
# Create an example protocol buffer
example = tf.train.Example(features=tf.train.Features(feature=feature))
# Serialize to string and write on the file
writer.write(example.SerializeToString())
writer.close()
# Use Dataset API to read the TFRecord file.
filenames = ["file"]
dataset = tf.data.TFRecordDataset(filenames)
def _parse_function(example_proto):
keys_to_features = {'i':tf.FixedLenFeature([],tf.int64),
'data':tf.FixedLenFeature([],tf.int64)}
parsed_features = tf.parse_single_example(example_proto, keys_to_features)
return parsed_features['i'], parsed_features['data']
dataset = dataset.map(_parse_function)
dataset = dataset.shuffle(buffer_size=1)
dataset = dataset.repeat()
dataset = dataset.batch(1)
iterator = dataset.make_one_shot_iterator()
i, data = iterator.get_next()
with tf.Session() as sess:
print(sess.run([i, data]))
print(sess.run([i, data]))
print(sess.run([i, data]))
报了奇怪的错误,Name: , Key: data, Index: 0. Number of int64 values != expected. Values size: 6 but output shape: [] 这意思是我数据长度为6,但是读出来的是[],这到底是哪里错了,我先把读取的代码注释掉,看看tfreocrd有没有写成功,发现写成功了,这就表明是读取的问题,我怀疑是因为每次写入的长度是变化的原因,但是又有觉得不是,因为图片的尺寸都是不同的,我还是可以读取的,百思不得其解的时候我发现存储图片的时候是img.tobytes(),我把一个数组转换成了bytes,而且用的也是bytes存储,是不是tensorflow会把这个bytes当成一个元素,虽然每个图片的size不同,但是tobytes后tensorflow都会当成一个元素,然后读取的时候再根据(height,width,channel)来解析成图片。
我来试试不存为int64,而是存为bytes。 又是一顿厉害的操作
数据转为bytes
# -*- coding: utf-8 -*-
import tensorflow as tf
import numpy as np
def _byte_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def _int64_feature(value):
if not isinstance(value,list):
value = [value]
return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
# Write an array to TFrecord.
# a is an array which contains lists of variant length.
a = np.array([[0, 54, 91, 153, 177,1],
[0, 50, 89, 147, 196],
[0, 38, 79, 157],
[0, 49, 89, 147, 177],
[0, 32, 73, 145]])
writer = tf.python_io.TFRecordWriter('file')
for i in range(a.shape[0]): # i = 0 ~ 4
feature = {'len' : _int64_feature(len(a[i])), # 将无意义的i改成len,为了后面还原
'data': _byte_feature(np.array(a[i]).tobytes())} # 我也不知道为什么a[i]是list(后面就知道了),要存bytes需要numpy一下
# Create an example protocol buffer
example = tf.train.Example(features=tf.train.Features(feature=feature))
# Serialize to string and write on the file
writer.write(example.SerializeToString())
writer.close()
#
# Use Dataset API to read the TFRecord file.
filenames = ["file"]
dataset = tf.data.TFRecordDataset(filenames)
def _parse_function(example_p