TensorFlow学习归纳keras.callbacks（未完）

最新推荐文章于 2023-06-06 22:51:43 发布

未生无

最新推荐文章于 2023-06-06 22:51:43 发布

阅读量352

点赞数

分类专栏： tensorflow

原文链接：https://github.com/ChihebTrabelsi/deep_complex_networks

版权

tensorflow 专栏收录该内容

19 篇文章 0 订阅

订阅专栏

总结一下目前所学的关于keras.callbacks中的回调函数。
参考https://github.com/ChihebTrabelsi/deep_complex_networks

回调函数是一组在训练的特定阶段被调用的函数集，你可以使用回调函数来观察训练过程中网络内部的状态和统计信息。
虽然我们称之为回调“函数”，但事实上Keras的回调函数是一个类

keras.callbacks内置类函数

keras.callbacks.Callback()**

这是回调函数的抽象类，定义新的回调函数必须继承自该类

属性和参数见https://keras.io/zh/callbacks/

keras.callbacks.LearningRateScheduler(schedule)

该回调函数是用于动态设置学习率
参数：
● schedule：函数，该函数以epoch号为参数（从0算起的整数），返回一个新学习率（浮点数）
示例：

def schedule(epoch):
	if   epoch >=   0 and epoch <  10:
		lrate = 0.01
		if epoch == 0:
			L.getLogger("train").info("Current learning rate value is "+str(lrate))
	elif epoch >=  10 and epoch < 100:
		lrate = 0.1
		if epoch == 10:
			L.getLogger("train").info("Current learning rate value is "+str(lrate))
	elif epoch >= 100 and epoch < 120:
		lrate = 0.01
		if epoch == 100:
			L.getLogger("train").info("Current learning rate value is "+str(lrate))
	elif epoch >= 120 and epoch < 150:
		lrate = 0.001
		if epoch == 120:
			L.getLogger("train").info("Current learning rate value is "+str(lrate))
	elif epoch >= 150:
		lrate = 0.0001
		if epoch == 150:
			L.getLogger("train").info("Current learning rate value is "+str(lrate))
	return lrate
# 学习率调度器
scheduler = LearningRateScheduler(schedule)

自定义的回调函数，继承keras.callbacks.Callback（）

TestErrorCallback（Callback）

评价在每一轮末在测试集上的性能表现

# Also evaluate performance on test set at each epoch end.
#

class TestErrorCallback(Callback):
	def __init__(self, test_data):
		self.test_data    = test_data
		self.loss_history = []
		self.acc_history  = []

	def on_epoch_end(self, epoch, logs={}):
		x, y = self.test_data
		
		L.getLogger("train").info("Epoch {:5d} Evaluating on test set...".format(epoch+1))
		test_loss, test_acc = self.model.evaluate(x, y, verbose=0)
		L.getLogger("train").info("                                      complete.")
		
		self.loss_history.append(test_loss)
		self.acc_history.append(test_acc)
		
		L.getLogger("train").info("Epoch {:5d} train_loss: {}, train_acc: {}, val_loss: {}, val_acc: {}, test_loss: {}, test_acc: {}".format(
		                          epoch+1,
		                          logs["loss"],     logs["acc"],
		                          logs["val_loss"], logs["val_acc"],
		                          test_loss,        test_acc))

LrDivisor（Callback）

连续若干轮验证集监测指标不变，则降低学习率

# LrDivisor. To use:
#
# lrDivisorCb     = LrDivisor(patience          = float(50000),
#                             division_cst      = 10.0,
#                             verbose           = 1,
#                             epoch_checkpoints = {75})
#

class LrDivisor(Callback):
	def __init__(self, patience=float(50000), division_cst=10.0, epsilon=1e-03, verbose=1, epoch_checkpoints={41, 61}):
		super(Callback, self).__init__()
		self.patience = patience                        #没有进步的训练轮数，在这之后训练就会停止
		self.checkpoints = epoch_checkpoints            
		self.wait = 0
		self.previous_score = 0.
		self.division_cst = division_cst
		self.epsilon = epsilon          #模糊因子，防止除零错误
		self.verbose = verbose
		self.iterations = 0

	def on_batch_begin(self, batch, logs={}):
		self.iterations += 1            

	def on_epoch_end(self, epoch, logs={}):
		current_score = logs.get('val_acc')
		divide = False
		if (epoch + 1) in self.checkpoints:         #？
			divide = True
		elif (current_score >= self.previous_score - self.epsilon and current_score <= self.previous_score + self.epsilon): #验证集acc几乎没变
			self.wait +=1
			if self.wait == self.patience:          #连续patience次验证集acc不改变
				divide = True
		else:
			self.wait = 0
		if divide == True:
			K.set_value(self.model.optimizer.lr, self.model.optimizer.lr.get_value() / self.division_cst)#学习率除以10
			self.wait = 0
			if self.verbose > 0:
				L.getLogger("train").info("Current learning rate is divided by"+str(self.division_cst) + ' and his values is equal to: ' + str(self.model.optimizer.lr.get_value()))
		self.previous_score = current_score

TrainValHistory（Callback）

把每一轮训练的性能记录下来

#
# Keep a history of the validation performance.
#

class TrainValHistory(Callback):
	def __init__(self):
		self.train_loss = []
		self.train_acc  = []
		self.val_loss   = []
		self.val_acc    = []

	def on_epoch_end(self, epoch, logs={}):
		self.train_loss.append(logs.get('loss'))
		self.train_acc .append(logs.get('acc'))
		self.val_loss  .append(logs.get('val_loss'))
		self.val_acc   .append(logs.get('val_acc'))

SaveLastModel（Callback）

每一个epoch保存一个checkpoint，保留一个最新记录的checkpoint-ModelChkpt.hdf5

#
# Save checkpoints.
#

class SaveLastModel(Callback):
	def __init__(self, workdir, period=10):
		self.workdir          = workdir
		self.chkptsdir        = os.path.join(self.workdir, "chkpts")#一个保存checkpoint文件的文件夹
		if not os.path.isdir(self.chkptsdir):
			os.mkdir(self.chkptsdir)
		self.period_of_epochs = period
		self.linkFilename     = os.path.join(self.chkptsdir, "ModelChkpt.hdf5") #最新的checkpoint文件，每次覆盖
	
	def on_epoch_end(self, epoch, logs={}):
		if (epoch + 1) % self.period_of_epochs == 0:        #整数轮次
			# Filenames
			baseHDF5Filename = "ModelChkpt{:06d}.hdf5".format(epoch+1)
			baseYAMLFilename = "ModelChkpt{:06d}.yaml".format(epoch+1)
			hdf5Filename     = os.path.join(self.chkptsdir, baseHDF5Filename)
			yamlFilename     = os.path.join(self.chkptsdir, baseYAMLFilename)
			
			# YAML
			yamlModel = self.model.to_yaml()
			with open(yamlFilename, "w") as yamlFile:
				yamlFile.write(yamlModel)
			
			# HDF5
			KM.save_model(self.model, hdf5Filename)#每个epoch创建一个
			with H.File(hdf5Filename, "r+") as f:
				f.require_dataset("initialEpoch", (), "uint64", True)[...] = int(epoch+1)       #这个epoch的checkpoint的initialEpoch是epoch+1
				f.flush()       #把数据写入到硬盘中，并且会释放被占用的内存
			
			# Symlink to new HDF5 file, then atomically rename and replace.
			os.symlink(baseHDF5Filename, self.linkFilename+".rename")           #创建一个软连接，src指向dst
			os.rename (self.linkFilename+".rename",
			           self.linkFilename)
			
			# Print
			L.getLogger("train").info("Saved checkpoint to {:s} at epoch {:5d}".format(hdf5Filename, epoch+1))

SaveBestModel（Callback）

保存目前最佳模型

#
# Save record-best models.
#

class SaveBestModel(Callback):
	def __init__(self, workdir):
		self.workdir   = workdir
		self.bestdir   = os.path.join(self.workdir, "best")
		if not os.path.isdir(self.bestdir):
			os.mkdir(self.bestdir)
		self.best_acc  = 0
		self.best_loss = +np.inf    #numpy.inf无限大正数
	
	def on_epoch_end(self, epoch, logs={}):
		val_loss = logs['loss']
		val_acc  = logs['acc']
		if val_acc > self.best_acc:
			self.best_acc  = val_acc
			self.best_loss = val_loss
			
			# Filenames
			hdf5Filename = os.path.join(self.bestdir, "Bestmodel_{:06d}_{:.4f}_{:.4f}.hdf5".format(epoch+1, val_acc, val_loss))
			yamlFilename = os.path.join(self.bestdir, "Bestmodel_{:06d}_{:.4f}_{:.4f}.yaml".format(epoch+1, val_acc, val_loss))
			
			# YAML
			yamlModel = self.model.to_yaml()
			with open(yamlFilename, "w") as yamlFile:
				yamlFile.write(yamlModel)
			
			# HDF5
			KM.save_model(self.model, hdf5Filename)
			with H.File(hdf5Filename, "r+") as f:
				f.require_dataset("initialEpoch", (), "uint64", True)[...] = int(epoch+1)
				f.flush()
			
			# Print
			L.getLogger("train").info("Saved best model to {:s} at epoch {:5d}".format(hdf5Filename, epoch+1))