t f . d a t a 构 建 T e n s o r F l o w 输 入 管 道 tf.data 构建TensorFlow输入管道 tf.data构建TensorFlow输入管道
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
print(tf.__version__)
print(np.__version__)
ds = tf.data.Dataset.from_tensor_slices([1,2,3,4])
for line in ds:
print(line)
ds = tf.data.Dataset.from_tensors([1,2,3,4])
for line in ds:
print(line)
t = tf.constant([[1, 2], [3, 4]])
ds = tf.data.Dataset.from_tensors(t) # [[1, 2], [3, 4]]
for line in ds:
print(line)
t = tf.constant([[1, 2], [3, 4]])
ds = tf.data.Dataset.from_tensors({"a":t,"b":t}) # [[1, 2], [3, 4]]
for line in ds:
print(line)
break
t = tf.constant([[1, 2], [3, 4]])
ds = tf.data.Dataset.from_tensor_slices({"a":t,"b":t}) # [[1, 2], [3, 4]]
for line in ds:
print(line)
break
dataset1=tf.data.Dataset.from_tensors(np.zeros(shape=(10,5,2),dtype=np.float32))
for line in dataset1:
print(line.shape)
break
dataset2=tf.data.Dataset.from_tensor_slices(np.zeros(shape=(10,5,2),dtype=np.float32))
for line in dataset2:
print(line.shape)
break
dataset3=tf.data.Dataset.from_tensors({"a":np.zeros(shape=(10,5,2),dtype=np.float32),"b":np.zeros(shape=(10,5,2),dtype=np.float32)})
for line in dataset3:
print(line['a'].shape,line['b'].shape)
break
dataset4 = tf.data.Dataset.from_tensor_slices({"a":np.zeros(shape=(10,5,2),dtype=np.float32),"b":np.zeros(shape=(10,5,2),dtype=np.float32)})
for line in dataset4:
print(line['a'].shape,line['b'].shape)
break
Dataset类
Dataset类读取numpy数据
最基础的建立 tf.data.Dataset 的方法是使用 tf.data.Dataset.from_tensor_slices() ,适用于数据量较小(能够整个装进内存)的情况。
具体而言,如果我们的数据集中的所有元素通过张量的第 0 维,拼接成一个大的张量(例如,前节的 MNIST 数据集的训练集即为一个 [60000, 28, 28, 1] 的张量,表示了 60000 张 28*28 的单通道灰度图像),那么我们提供一个这样的张量或者第 0 维大小相同的多个张量作为输入,即可按张量的第 0 维展开来构建数据集,数据集的元素数量为张量第 0 位的大小。
mnist = np.load("mnist.npz")
x_train, y_train = mnist['x_train'],mnist['y_train']
x_train.shape,y_train.shape
x_train = np.expand_dims(x_train, axis=-1)
mnist_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
for image, label in mnist_dataset:
plt.title(label.numpy())
plt.imshow(image.numpy()[:, :,0])
plt.show()
break
Pandas数据读取
import pandas as pd
df = pd.read_csv('heart.csv')
df.head()
df.dtypes
df['thal'] = pd.Categorical(df['thal'])
df['thal'] = df.thal.cat.codes
target = df.pop('target')
dataset = tf.data.Dataset.from_tensor_slices((df.values, target.values))
for feat, targ in dataset.take(5):
print ('Features: {}, Target: {}'.format(feat, targ))
从Python generator构建数据管道
img_gen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255, rotation_range=20)
flowers = './flower_photos/flower_photos/'
def Gen():
gen = img_gen.flow_from_directory(flowers)
for (x,y) in gen:
yield (x,y)
ds = tf.data.Dataset.from_generator(
Gen,
output_types=(tf.float32, tf.float32)
# output_shapes=([32,256,256,3], [32,5])
)
for image,label in ds:
print(image.shape,label.shape)
break
TFRecordDataset类
feature_description = { # 定义Feature结构,告诉解码器每个Feature的类型是什么
'image': tf.io.FixedLenFeature([], tf.string),
'label': tf.io.FixedLenFeature([], tf.int64),
}
def _parse_example(example_string): # 将 TFRecord 文件中的每一个序列化的 tf.train.Example 解码
feature_dict = tf.io.parse_single_example(example_string, feature_description)
feature_dict['image'] = tf.io.decode_jpeg(feature_dict['image']) # 解码JPEG图片
feature_dict['image'] = tf.image.resize(feature_dict['image'], [256, 256]) / 255.0
return feature_dict['image'], feature_dict['label']
batch_size = 32
train_dataset = tf.data.TFRecordDataset("sub_train.tfrecords") # 读取 TFRecord 文件
# filename label
train_dataset = train_dataset.map(_parse_example)
for image,label in train_dataset:
print(image,label)
break
TextLineDataset类
titanic_lines = tf.data.TextLineDataset(['train.csv','eval.csv'])
def data_func(line):
line = tf.strings.split(line, sep = ",")
return line
titanic_data = titanic_lines.skip(1).map(data_func)
for line in titanic_data:
print(line)
break
二 案例
import tensorflow as tf
print(tf.__version__)
tf.test.is_gpu_available()
import tensorflow as tf
#Dense
class MyDense(tf.keras.layers.Layer):
def __init__(self, units=32, **kwargs):
self.units = units
super(MyDense, self).__init__(**kwargs)
#build方法一般定义Layer需要被训练的参数。
def build(self, input_shape):
self.w = self.add_weight(shape=(input_shape[-1], self.units),
initializer='random_normal',
trainable=True,
name='w')
self.b = self.add_weight(shape=(self.units,),
initializer='random_normal',
trainable=True,
name='b')
super(MyDense,self).build(input_shape) # 相当于设置self.built = True
#call方法一般定义正向传播运算逻辑,__call__方法调用了它。
def call(self, inputs):
return tf.matmul(inputs, self.w) + self.b
#如果要让自定义的Layer通过Functional API 组合成模型时可以序列化,需要自定义get_config方法。
def get_config(self):
config = super(MyDense, self).get_config()
config.update({'units': self.units})
return config
from sklearn import datasets
iris = datasets.load_iris()
data = iris.data
labels = iris.target
# from sklearn.preprocessing import MinMaxScaler
# data=MinMaxScaler().fit_transform(data)
data[:5]
labels#(150,3)
#网络 函数式构建的网络
inputs = tf.keras.Input(shape=(4,))
x = MyDense(units=16)(inputs)
x = tf.nn.tanh(x)
x = MyDense(units=3)(x) #0,1,2
# x= tf.keras.layers.Dense(16)(x)
predictions = tf.nn.softmax(x)
model = tf.keras.Model(inputs=inputs, outputs=predictions)
#shuffle:
import numpy as np
data = np.concatenate((data,labels.reshape(150,1)),axis=-1)
np.random.shuffle(data)
labels = data[:,-1]
data = data[:,:4]
#labels ==[1,0,0]
#优化器 Adam
#损失函数 交叉熵损失函数
#评估函数 #acc
model.compile(optimizer=tf.keras.optimizers.Adam(),
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])
#keras
model.fit(data, labels, batch_size=32, epochs=100,shuffle=True)
# _custom_objects = {
# "Mylayer" : Line,
# }
model.summary()
model.save('keras_model_tf_version.h5')
_custom_objects = {
"MyDense" : MyDense,
}
new_model = tf.keras.models.load_model("keras_model_tf_version.h5",custom_objects=_custom_objects)
y_pred = new_model.predict(data)
np.argmax(y_pred,axis=1)
labels