自定义某个layer的正向的计算结果,以及反向传播的梯度。
如果layer的输入为单个tensor,参考写法:
import tensorflow as tf
input = tf.Variable([1.0], dtype=tf.float32)
@tf.custom_gradient
def clip_grad_layer(x):
def grad(dy):
return tf.clip_by_value(dy, -0.1, 0.1)
return 3*tf.identity(x), grad
output_clip = clip_grad_layer(input)
grad_clip = tf.gradients(output_clip, input)
# output without gradient clipping in the backwards pass for comparison:
output_normal = tf.identity(input)
grad_normal = tf.gradients(output, input)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
print("input:", sess.run(input))
print("output_clipping:", sess.run(output_clip))
print("output_no_clipping:", sess.run(output_normal))
print("with clipping:", sess.run(grad_clip)[0])
print("without clipping:", sess.run(grad_normal)[0])
如果layer的输入为多个tensor,以2个为例,参考写法:
import tensorflow as tf
#I want to write custom gradient for this function f1
def f1(A,x):
y=tf.matmul(A,x,name='y')
return y
#for y= Ax, the derivative is: dy/dx= transpose(A)
@tf.custom_gradient
def f2(A,x):
y=f1(A,x)
def grad(dzByDy): # dz/dy = 2y reaches here correctly.
dzByDx=tf.matmul(A,dzByDy,transpose_a=True)
return None, dzByDx # 假设我们永远不需要求结果对于第一个输入A的偏导,则置为None
return y,grad
x= tf.constant([[1.],[0.]],name='x')
A= tf.constant([ [1., 2.], [3., 4.]],name='A')
y=f2(A,x)
z=tf.reduce_sum(y*y,name='z')
g=tf.gradients(ys=z,xs=x)
with tf.Session() as sess:
print(sess.run(g))