optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate)
grads_and_vars = optimizer.compute_gradients(self.loss)
for i, (g, v) in enumerate(grads_and_vars):
if g is not None:
grads_and_vars[i] = (tf.clip_by_norm(g, FLAGS.norm_clip), v)
self.train_op = optimizer.apply_gradients(grads_and_vars, global_step=self.global_step)
如上,需要在计算梯度和梯度更新中间添加几行代码。