几种读取方法
处理一个文本文件里的多行
dataset = tf.data.TextLineDataset(["file1.txt", "file2.txt"])
处理TFRecoed
dataset = tf.data.TFRecordDataset(["file1.tfrecords", "file2.tfrecords"])
处理多个(文本)文件
dataset = tf.data.dataset.list_files("/path/*.txt") # doctest: +SKIP
通过yield读取数据
使用tf.data.Dataset中的from_generator函数,示例代码如下
import loompy as lp
import tensorflow as tf
from sklearn.model_selection import train_test_split
model_input_name = ""
input_size = 10000
batch_size = 32
epochs = 10
# Input functions for train, test and eval sets.
def train_input_fn():
return _input_fn('TRAIN')
def test_input_fn():
return _input_fn('TEST')
def eval_input_fn():
return _input_fn('EVAL')
# General purpose input function
def _input_fn(mode = 'TRAIN'):
"""
Arguments
mode : 'TRAIN', 'TEST', 'EVAL'
"""
# A generator to yield data and labels from the given FILE,
# based on the indices assigned to the "indices" variable.
# If you change the labels, remember to update the from_generator()
# parameters below, to reflect their datatype.
def gen():
with lp.connect(FILE, 'r') as ds