利用tf.GradientTape() 梯度带,保存模型.
版权声明:本文为CSDN博主「hjxu2016」的原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接及本声明。
原文链接:https://blog.csdn.net/hjxu2016/article/details/105222369
total_num = 25000
#参数设置
learning_rate = 0.001
test_step = 1000
saved_step = 5000
EPOCHS = 10
batch_size = 16
display_step = 10
training_step = int(total_num / batch_size)
train_record_path = "./train.record"
test_record_path = "./test.record"
# 调用后我们会得到一个Dataset(tf.data.Dataset),字面理解,这里面就存放着我们之前写入的所有Example。
train_dataset = tf.data.TFRecordDataset(train_record_path)
test_dataset = tf.data.TFRecordDataset(test_record_path)
# 定义一个解析函数
feature_description = {
'image/filename': tf.io.FixedLenFeature([], tf.string),
'image/class': tf.io.FixedLenFeature([], tf.int64),
'image/encoded': tf.io.FixedLenFeature([], tf.string)
}
#
def parese_example(serialized_example):
feature_dict = tf.io.parse_single_example(serialized_example, feature_description)
image = tf.io.decode_jpeg(feature_dict['image/encoded']) # 解码JPEG图片
image = tf.image.resize_with_crop_or_pad(image, 224, 224)
image = tf.reshape(image, [224, 224, 3])
image = tf.cast(image, tf.float32)
feature_dict['image'] = image
return feature_dict['image'], feature_dict['image/class']
#
#
train_dataset = train_dataset.map(parese_example)
test_dataset = test_dataset.map(parese_example)
train_dataset = train_dataset.repeat().shuffle(5000).batch(batch_size).prefetch(3)
test_dataset = test_dataset.repeat().shuffle(5000).batch(batch_size, drop_remainder=True)
ResNet50 = tf.keras.applications.ResNet50(weights=None, include_top=False)
global_average_layer = tf.keras.layers.GlobalAveragePooling2D()
fc = tf.keras.layers.Dense(2, activation="softmax")
model = tf.keras.Sequential([ResNet50, global_average_layer, fc])
# # Choose an optimizer and loss function for training
loss_object = tf.keras.losses.SparseCategoricalCrossentropy()
# # Select metrics to measure the loss and the accuracy of the model
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')
optimizer = tf.keras.optimizers.Adam(0.001)
def train_step(images, labels):
with tf.GradientTape() as tape:
predictions = model(images, training=True)
# print("=> label shape: ", labels.shape, "pred shape", predictions.shape)
loss = loss_object(labels, predictions)
gradients = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(gradients, model.trainable_variables))
train_loss(loss)
train_accuracy(labels, predictions)
print("train..")
def test_step(images, labels):
predictions = model(images)
t_loss = loss_object(labels, predictions)
test_loss(t_loss)
test_accuracy(labels, predictions)
for epoch in range(EPOCHS):
for step, (batch_x, batch_y) in enumerate(train_dataset, 1):
train_step(batch_x, batch_y)
if(step % display_step == 0):
template = '=> train: step {}, Loss: {:.4}, Accuracy: {:.2%}'
print(template.format(step+1,
train_loss.result(),
train_accuracy.result(),
))
for step, (batch_x, batch_y) in enumerate(test_dataset, 1):
test_step(batch_x, batch_y)
template = '=> Epoch {}, , Test Loss: {:.4}, Test Accuracy: {:.2%}'
print(template.format(epoch + 1,
test_loss.result(),
test_accuracy.result()))
root = tf.train.Checkpoint(optimizer=optimizer,
model=model)
saved_folder = "./ckpt2Model"
if(not os.path.exists(saved_folder)):
os.mkdir(saved_folder)
checkpoint_prefix = (saved_folder + "/epoch:%i_acc") % (epoch + 1)
root.save(checkpoint_prefix)
提取模型
注:Checkpoint只用于保存模型的参数,不保存模型的计算过程,因此一般用于在具有模型源代码的情况下恢复之前训练好的模型参数。如果需要导出模型(无需源代码也能运行模型),参考 SaveModel
1# train.py 模型训练阶段
2
3model = MyModel()
4# 实例化Checkpoint,指定保存对象为model(如果需要保存Optimizer的参数也可加入)
5checkpoint = tf.train.Checkpoint(myModel=model)
6# ...(模型训练代码)
7# 模型训练完毕后将参数保存到文件(也可以在模型训练过程中每隔一段时间就保存一次)
8checkpoint.save('./save/model.ckpt')
1# test.py 模型使用阶段
2
3model = MyModel()
4checkpoint = tf.train.Checkpoint(myModel=model) # 实例化Checkpoint,指定恢复对象为model
5checkpoint.restore(tf.train.latest_checkpoint('./save')) # 从文件恢复模型参数
6# 模型使用代码
1import tensorflow as tf
2import numpy as np
3import argparse
4from zh.model.mnist.mlp import MLP
5from zh.model.utils import MNISTLoader
6
7parser = argparse.ArgumentParser(description='Process some integers.')
8parser.add_argument('--mode', default='train', help='train or test')
9parser.add_argument('--num_epochs', default=1)
10parser.add_argument('--batch_size', default=50)
11parser.add_argument('--learning_rate', default=0.001)
12args = parser.parse_args()
13data_loader = MNISTLoader()
14
15
16def train():
17 model = MLP()
18 optimizer = tf.keras.optimizers.Adam(learning_rate=args.learning_rate)
19 num_batches = int(data_loader.num_train_data // args.batch_size * args.num_epochs)
20 checkpoint = tf.train.Checkpoint(myAwesomeModel=model) # 实例化Checkpoint,设置保存对象为model
21 for batch_index in range(1, num_batches+1):
22 X, y = data_loader.get_batch(args.batch_size)
23 with tf.GradientTape() as tape:
24 y_pred = model(X)
25 loss = tf.keras.losses.sparse_categorical_crossentropy(y_true=y, y_pred=y_pred)
26 loss = tf.reduce_mean(loss)
27 print("batch %d: loss %f" % (batch_index, loss.numpy()))
28 grads = tape.gradient(loss, model.variables)
29 optimizer.apply_gradients(grads_and_vars=zip(grads, model.variables))
30 if batch_index % 100 == 0: # 每隔100个Batch保存一次
31 path = checkpoint.save('./save/model.ckpt') # 保存模型参数到文件
32 print("model saved to %s" % path)
33
35def test():
36 model_to_be_restored = MLP()
37 # 实例化Checkpoint,设置恢复对象为新建立的模型model_to_be_restored
38 checkpoint = tf.train.Checkpoint(myAwesomeModel=model_to_be_restored)
39 checkpoint.restore(tf.train.latest_checkpoint('./save')) # 从文件恢复模型参数
40 y_pred = np.argmax(model_to_be_restored.predict(data_loader.test_data), axis=-1)
41 print("test accuracy: %f" % (sum(y_pred == data_loader.test_label) / data_loader.num_test_data))
42
43
44if __name__ == '__main__':
45 if args.mode == 'train':
46 train()
47 if args.mode == 'test':
48 test()