import tensorflow as tf
from tensorflow.keras.datasets import fashion_mnist
def make_model(n_classes):
return tf.keras.Sequential(
[
tf.keras.layers.Conv2D(
32, (5, 5), activation=tf.nn.relu, input_shape=(28, 28, 1)
),
tf.keras.layers.MaxPool2D((2, 2), (2, 2)),
tf.keras.layers.Conv2D(64, (3, 3), activation=tf.nn.relu),
tf.keras.layers.MaxPool2D((2, 2), (2, 2)),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(1024, activation=tf.nn.relu),
tf.keras.layers.Dropout(0.5),
tf.keras.layers.Dense(n_classes),
]
)
def load_data():
(train_x, train_y), (test_x, test_y) = fashion_mnist.load_data()
# Scale input in [-1, 1] range
train_x = tf.expand_dims(train_x, -1)
train_x = (tf.image.convert_image_dtype(train_x, tf.float32) - 0.5) * 2
train_y = tf.expand_dims(train_y, -1)
test_x = test_x / 255.0 * 2 - 1
test_x = (tf.image.convert_image_dtype(test_x, tf.float32) - 0.5) * 2
test_y = tf.expand_dims(test_y, -1)
return (train_x, train_y), (test_x, test_y)
def train():
# Define the model
n_classes = 10
model = make_model(n_classes)
# Input data
(train_x, train_y), (test_x, test_y) = load_data()
# Training parameters
loss = tf.losses.SparseCategoricalCrossentropy(from_logits=True)
step = tf.Variable(1, name="global_step")
optimizer = tf.optimizers.Adam(1e-3)
ckpt = tf.train.Checkpoint(step=step, optimizer=optimizer, model=model)
manager = tf.train.CheckpointManager(ckpt, "./tf_ckpts", max_to_keep=3)
ckpt.restore(manager.latest_checkpoint)
if manager.latest_checkpoint:
print(f"Restored from {manager.latest_checkpoint}")
else:
print("Initializing from scratch.")
accuracy = tf.metrics.Accuracy()
mean_loss = tf.metrics.Mean(name="loss")
# Train step function
@tf.function
def train_step(inputs, labels):
with tf.GradientTape() as tape:
logits = model(inputs)
loss_value = loss(labels, logits)
gradients = tape.gradient(loss_value, model.trainable_variables)
# TODO: apply gradient clipping here
optimizer.apply_gradients(zip(gradients, model.trainable_variables))
step.assign_add(1)
accuracy.update_state(labels, tf.argmax(logits, -1))
return loss_value, accuracy.result()
epochs = 10
batch_size = 32
nr_batches_train = int(train_x.shape[0] / batch_size)
print(f"Batch size: {batch_size}")
print(f"Number of batches per epoch: {nr_batches_train}")
train_summary_writer = tf.summary.create_file_writer("./log/train")
with train_summary_writer.as_default():
for epoch in range(epochs):
for t in range(nr_batches_train):
start_from = t * batch_size
to = (t + 1) * batch_size
features, labels = train_x[start_from:to], train_y[start_from:to]
loss_value, accuracy_value = train_step(features, labels)
mean_loss.update_state(loss_value)
if t % 10 == 0:
print(f"{step.numpy()}: {loss_value} - accuracy: {accuracy_value}")
save_path = manager.save()
print(f"Checkpoint saved: {save_path}")
tf.summary.image(
"train_set", features, max_outputs=3, step=step.numpy()
)
tf.summary.scalar("accuracy", accuracy_value, step=step.numpy())
tf.summary.scalar("loss", mean_loss.result(), step=step.numpy())
accuracy.reset_states()
mean_loss.reset_states()
print(f"Epoch {epoch} terminated")
# Measuring accuracy on the whole training set at the end of epoch
for t in range(nr_batches_train):
start_from = t * batch_size
to = (t + 1) * batch_size
features, labels = train_x[start_from:to], train_y[start_from:to]
logits = model(features)
accuracy.update_state(labels, tf.argmax(logits, -1))
print(f"Training accuracy: {accuracy.result()}")
accuracy.reset_states()
if __name__ == "__main__":
train()
tf.GradientTape()函数创建一个记录所有自动微分运算的上下文(’磁带‘),这也解决tensorflow1.x版本用图来计算自动微分梯度
通常调用tf.GradientTape()时(也即tape.gradient),磁带会自动删除全部内部数据,若需多次调用可以令with tf.GradientTape(persistent=True) as tape
:
with tf.GradientTape() as tape:
logits = model(inputs)
loss_value = loss(labels, logits)
gradients = tape.gradient(loss_value, model.trainable_variables)
保存和恢复模型状态
ckpt = tf.train.Checkpoint(step=step, optimizer=optimizer, model=model)
manager = tf.train.CheckpointManager(ckpt, "./tf_ckpts", max_to_keep=3)
ckpt.restore(manager.latest_checkpoint)
if manager.latest_checkpoint:
print(f"Restored from {manager.latest_checkpoint}")
else:
print("Initializing from scratch.")
利用tensorBoard进行数据的可视化,首先创建文件,定义上下文管理器,最后进行调用写入数据,当我们要对训练过程可视化时,在代码目录打开终端运行tensorboard --logdir=./tensorboard
然后使用浏览器访问命令行程序所输出的网址(一般是 http://name-of-your-computer:6006)
train_summary_writer = tf.summary.create_file_writer("./log/train")
with train_summary_writer.as_default():
......
tf.summary.image(
"train_set", features, max_outputs=3, step=step.numpy()
)
tf.summary.scalar("accuracy", accuracy_value, step=step.numpy())
tf.summary.scalar("loss", mean_loss.result(), step=step.numpy())
下面是利用sequential简单顺序API流程,方遍调用,但是没有上面的应用广泛,自制训练循环可以调节梯度以及可以按照自己的要求添加功能,相应也比较麻烦,两者都是基于顺序API进行堆叠层数建立模型,后面学习的resnet等残差模块需要用到函数式API
import tensorflow as tf
from tensorflow.keras.datasets import fashion_mnist
n_classes = 10
model = tf.keras.Sequential(
[
tf.keras.layers.Conv2D(
32, (5, 5), activation=tf.nn.relu, input_shape=(28, 28, 1)
),
tf.keras.layers.MaxPool2D((2, 2), (2, 2)),
tf.keras.layers.Conv2D(64, (3, 3), activation=tf.nn.relu),
tf.keras.layers.MaxPool2D((2, 2), (2, 2)),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(1024, activation=tf.nn.relu),
tf.keras.layers.Dropout(0.5),
tf.keras.layers.Dense(n_classes),
]
)
model.summary()
(train_x, train_y), (test_x, test_y) = fashion_mnist.load_data()
# Scale input in [-1, 1] range
train_x = train_x / 255.0 * 2 - 1
test_x = test_x / 255.0 * 2 - 1
train_x = tf.expand_dims(train_x, -1).numpy()
test_x = tf.expand_dims(test_x, -1).numpy()
model.compile(
optimizer=tf.keras.optimizers.Adam(1e-5),
loss="sparse_categorical_crossentropy",
metrics=["accuracy"],
)
model.fit(train_x, train_y, epochs=10)
model.evaluate(test_x, test_y)
eager执行模式:
它能立即进行评估计算,不需要建图,tensorflow2.0默认是eager执行模式,使得模型调试更加简单,减少了代码量,当模型深度较深、更复杂时,追求高性能或部署模型时,我们依然希望使用 TensorFlow 1.X 中默认的图执行模式(Graph Execution),将模型转换为高效的 TensorFlow 图模型。此时,TensorFlow 2 为我们提供了 tf.function 模块,结合 AutoGraph 机制,使得我们仅需加入一个简单的 @tf.function 修饰符,就能轻松将模型以图执行模式运行。
使用图模式时是对常量而言,对动态变量会触发异常例如遇到tf.Variable()
加入程序含有变量依然想用图模式,其解决办法也有
第一种是改变函数的定义,通过输入参数传递变量
import tensorflow as tf
def f(b):
A = tf.constant([[1, 2], [3, 4]], dtype=tf.float32)
x = tf.constant([[0, 10], [0, 0.5]])
#b = tf.constant([[1, -1]], dtype=tf.float32)
y = tf.add(tf.matmul(A, x), b, name="result")
return y
var=tf.Variable(12.)
f(var)
f(15)
f(tf.constant(1))
现在f可以接受变量,张量都可以,适应了图加速版本严格的类型限制
第二种是打破函数作用域,使变量在函数作用域之外,这里不推荐全局变量,可以使用keras对象,如下
class F():
def__init__(self):
self._b=None
@tf.function
def __call__(self):
A = tf.constant([[1, 2], [3, 4]], dtype=tf.float32)
x = tf.constant([[0, 10], [0, 0.5]])
if self._b is None:
self._b=TF.Variable(12.)
y = tf.add(tf.matmul(A, x), b, name="result")
f=F()
f()