加载npz在文件绘制精度和损失
def plot_cifar_loss(npz_path):
"""
read flower.npz and plot acc val_acc loss val_loss ...
"""
data = np.load(npz_path)
# print(data.files)
acc = data['acc']
val_acc = data['val_acc']
loss = data['loss']
val_loss = data['val_loss']
# num_epochs = 10
# epochs_range = range(num_epochs)
plt.figure(figsize=(16, 8))
plt.subplot(1, 2, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')
plt.subplot(1, 2, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()
保存训练过程
npz_path = 'plot_npz/cifar.npz'
npz_save(npz_path, history=history)
plot_cifar_loss(npz_path)
test_loss, test_acc = model.evaluate(test_images, test_labels, verbose=2)
print('测试精度: ', test_acc)
把compile和fit放在一个函数里断点训练模型
def compile_and_fit_v1(model, dataset_modelname=None, max_epochs=20,
train_ds=None, test_ds=None, steps_per_epoch=3):
model.compile(optimizer=tf.keras.optimizers.Adam(),
loss='sparse_categorical_crossentropy',
metrics=["accuracy"])
print(len(model.trainable_variables), 'is len model train variables')
model.summary()
checkpoint_path = 'checkpoints/' + dataset_modelname
checkpoint = tf.train.Checkpoint(myAwesomeModel=model)
checkpoint.restore(tf.train.latest_checkpoint(checkpoint_path))
# 使用tf.train.CheckpointManager管理Checkpoint
manager = tf.train.CheckpointManager(checkpoint, directory=checkpoint_path, max_to_keep=3)
history = model.fit(train_ds, epochs=max_epochs, validation_data=test_ds,steps_per_epoch=steps_per_epoch)
path = manager.save(checkpoint_number=20)
print("model 保存在 %s" % path)
return history
使用take和skip划分验证集
在shuffle之后batch之前进行映射,以便充分打乱训练集。注意不要再划分的时候,
catch,否则内存就不自动释放了。
def prepare_ds(image_label_ds, image_count):
BATCH_SIZE = 32
AUTOTUNE = tf.data.experimental.AUTOTUNE
N_VALIDATION = int(image_count * 0.2)
N_TRAIN = int(image_count - N_VALIDATION)
print(N_TRAIN, 'for train,', N_VALIDATION, 'for validation')
print(type(N_VALIDATION))
validate_ds = image_label_ds.take(N_VALIDATION)
train_ds = image_label_ds.skip(N_VALIDATION).take(N_TRAIN)
# 设置一个和数据集大小一致的 shuffle buffer size(随机缓冲区大小)以保证数据
# 被充分打乱。
ds = train_ds.shuffle(buffer_size=N_TRAIN)
ds = ds.map(load_and_preprocess_from_path_label)
ds = ds.repeat()
vs = validate_ds.map(load_and_preprocess_from_path_label)
vs = vs.batch(BATCH_SIZE)
ds = ds.batch(BATCH_SIZE)
# 当模型在训练的时候,`prefetch` 使数据集在后台取得 batch。
ds = ds.prefetch(buffer_size=AUTOTUNE)
vs = vs.prefetch(buffer_size=AUTOTUNE)
print(ds, 'is ds prefetch')
steps_per_epoch = tf.math.ceil(N_TRAIN / BATCH_SIZE).numpy()
print(steps_per_epoch,'is steps per epoch')
return ds, vs, steps_per_epoch
总结
如果使用了 repeat,则一定要制定steps_per_epoch
history = model.fit(train_ds, epochs=max_epochs, validation_data=test_ds,steps_per_epoch=steps_per_epoch)