import tensorflow as tf
@tf.RegisterGradient("CustomClipGrad")
def _clip_grad(unused_op, grad):
#grad是从上一层传过来的梯度 就是对应output_clip的梯度 根据链式法则 往前乘
print(unused_op.inputs)
print(unused_op.inputs[0])
print(unused_op.inputs[1])#对应存在两个输入
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
print(sess.run(unused_op.inputs[0]))#对应variable
print(sess.run(unused_op.inputs[1]))#对应tensor的Mul:0就是13
return grad*10,None
#这个地方的原因是有两个输入 必须对应两个梯度才行,所以必须加一个None 对应因为常数不需要梯度(乘法因子) 因为求导是对应每一个都求
#return grad,None
#return tf.clip_by_value(grad, -0.1, 0.1)
#return grad[0]*10,grad[1]*10
input = tf.Variable([3.0], dtype=tf.float32, name='w1')
g = tf.get_default_graph()
with g.gradient_override_map({"Mul": "CustomClipGrad"}):
output_clip = tf.multiply(input,13)
#output_clip = tf.identity(input,name='Identity')
output_2 = output_clip*2.0
grad_clip = tf.gradients(output_2, input)#梯度从头开始传播 刚开始相当于是identity 为1 然后慢慢向前传播 根据链式法则 乘法
# output without gradient clipping in the backwards pass for comparison:
output = tf.identity(input)
grad = tf.gradients(output, input)
print(output_clip)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
print("with clipping:", sess.run(grad_clip)[0])
print("without clipping:", sess.run(grad)[0])