repeat、shuffle和batch的用法:
import tensorflow as tf
import numpy as np
test_int = np.array(range(10))
print("test_int: ", test_int)
# 需要转换成dataset对象,实际上没改变数据内容
int_dataset = tf.data.Dataset.from_tensor_slices(test_int)
print("int_dataset: ", int_dataset)
# batch
sequences1 = int_dataset.batch(3, drop_remainder=True)
print("sequences1: ", sequences1)
for item in sequences1.take(-1):
print("sequences1_item: ", item)
sequences2 = int_dataset.batch(3)
print("sequences2: ", sequences2)
for item in sequences2.take(-1):
print("sequences2_item: ", item)
# batch + shuffle
sequences3 = int_dataset.shuffle(3).batch(3, drop_remainder=True)
print("sequences3: ", sequences3)
for item in sequences3.take(-1):
print("sequences3_item: ", item)
sequences4 = int_dataset.batch(3).shuffle(3)
print("sequences3: ", sequences4)
for item in sequences4.take(-1):
print("sequences4_item: ", item)
# repeat
sequences5 = int_dataset.repeat(2)
print("sequences5: ", sequences5)
for item in sequences5.take(-1):
print("sequences5_item: ", item)
# repeat + batch
sequences6 = int_dataset.batch(2).repeat(2)
print("sequences6: ", sequences6)
for item in sequences6.take(-1):
print("sequences6_item: ", item)
输出:
test_int: [0 1 2 3 4 5 6 7 8 9]
int_dataset: <TensorSliceDataset element_spec=TensorSpec(shape=(), dtype=tf.int32, name=None)>
sequences1: <BatchDataset element_spec=TensorSpec(shape=(3,), dtype=tf.int32, name=None)>
sequences1_item: tf.Tensor([0 1 2], shape=(3,), dtype=int32)
sequences1_item: tf.Tensor([3 4 5], shape=(3,), dtype=int32)
sequences1_item: tf.Tensor([6 7 8], shape=(3,), dtype=int32)
sequences2: <BatchDataset element_spec=TensorSpec(shape=(None,), dtype=tf.int32, name=None)>
sequences2_item: tf.Tensor([0 1 2], shape=(3,), dtype=int32)
sequences2_item: tf.Tensor([3 4 5], shape=(3,), dtype=int32)
sequences2_item: tf.Tensor([6 7 8], shape=(3,), dtype=int32)
sequences2_item: tf.Tensor([9], shape=(1,), dtype=int32)
sequences3: <BatchDataset element_spec=TensorSpec(shape=(3,), dtype=tf.int32, name=None)>
2023-05-07 23:29:54.726950: W tensorflow/core/data/root_dataset.cc:247] Optimization loop failed: CANCELLED: Operation was cancelled
sequences3_item: tf.Tensor([0 1 3], shape=(3,), dtype=int32)
sequences3_item: tf.Tensor([5 6 4], shape=(3,), dtype=int32)
sequences3_item: tf.Tensor([2 8 7], shape=(3,), dtype=int32)
sequences3: <ShuffleDataset element_spec=TensorSpec(shape=(None,), dtype=tf.int32, name=None)>
sequences4_item: tf.Tensor([3 4 5], shape=(3,), dtype=int32)
sequences4_item: tf.Tensor([9], shape=(1,), dtype=int32)
sequences4_item: tf.Tensor([0 1 2], shape=(3,), dtype=int32)
sequences4_item: tf.Tensor([6 7 8], shape=(3,), dtype=int32)
sequences5: <RepeatDataset element_spec=TensorSpec(shape=(), dtype=tf.int32, name=None)>
sequences5_item: tf.Tensor(0, shape=(), dtype=int32)
sequences5_item: tf.Tensor(1, shape=(), dtype=int32)
sequences5_item: tf.Tensor(2, shape=(), dtype=int32)
sequences5_item: tf.Tensor(3, shape=(), dtype=int32)
sequences5_item: tf.Tensor(4, shape=(), dtype=int32)
sequences5_item: tf.Tensor(5, shape=(), dtype=int32)
sequences5_item: tf.Tensor(6, shape=(), dtype=int32)
sequences5_item: tf.Tensor(7, shape=(), dtype=int32)
sequences5_item: tf.Tensor(8, shape=(), dtype=int32)
sequences5_item: tf.Tensor(9, shape=(), dtype=int32)
sequences5_item: tf.Tensor(0, shape=(), dtype=int32)
sequences5_item: tf.Tensor(1, shape=(), dtype=int32)
sequences5_item: tf.Tensor(2, shape=(), dtype=int32)
sequences5_item: tf.Tensor(3, shape=(), dtype=int32)
sequences5_item: tf.Tensor(4, shape=(), dtype=int32)
sequences5_item: tf.Tensor(5, shape=(), dtype=int32)
sequences5_item: tf.Tensor(6, shape=(), dtype=int32)
sequences5_item: tf.Tensor(7, shape=(), dtype=int32)
sequences5_item: tf.Tensor(8, shape=(), dtype=int32)
sequences5_item: tf.Tensor(9, shape=(), dtype=int32)
sequences6: <RepeatDataset element_spec=TensorSpec(shape=(None,), dtype=tf.int32, name=None)>
sequences6_item: tf.Tensor([0 1], shape=(2,), dtype=int32)
sequences6_item: tf.Tensor([2 3], shape=(2,), dtype=int32)
sequences6_item: tf.Tensor([4 5], shape=(2,), dtype=int32)
sequences6_item: tf.Tensor([6 7], shape=(2,), dtype=int32)
sequences6_item: tf.Tensor([8 9], shape=(2,), dtype=int32)
sequences6_item: tf.Tensor([0 1], shape=(2,), dtype=int32)
sequences6_item: tf.Tensor([2 3], shape=(2,), dtype=int32)
sequences6_item: tf.Tensor([4 5], shape=(2,), dtype=int32)
sequences6_item: tf.Tensor([6 7], shape=(2,), dtype=int32)
sequences6_item: tf.Tensor([8 9], shape=(2,), dtype=int32)
shuffle的随机比较有意思
参考下面这个链接理解:
https://www.youtube.com/watch?v=c7G5W4Wv72Q