单输出感知机
x=tf.random.normal([1,3])
y=tf.constant([1])
w=tf.ones([3,1])
b=tf.ones([1])
with tf.GradientTape() as tape:
tape.watch([w,b])
logits=x@w+b
loss=tf.reduce_mean(tf.losses.MSE(y,logits))
grads=tape.gradient(loss,[w,b])
grads
[<tf.Tensor: shape=(3, 1), dtype=float32, numpy=
array([[ 8.862079 ],
[-2.7388933],
[ 4.958465 ]], dtype=float32)>, <tf.Tensor: shape=(1,), dtype=float32, numpy=array([4.7077913], dtype=float32)>]
多输出
x=tf.random.normal([2,4])
w=tf.random.normal([4,3])
b=tf.zeros([3])
y=tf.constant([2,0])
with tf.GradientTape() as tape:
tape.watch([w,b])
prob=tf.nn.softmax(x@w+b,axis=1)
loss=tf.reduce_mean(tf.losses.MSE(tf.one_hot(y,depth=3),prob))
grads=tape.gradient(loss,[w,b])
grads
[<tf.Tensor: shape=(4, 3), dtype=float32, numpy=
array([[-0.02361509, 0.1223269 , -0.0987118 ],
[ 0.01090372, -0.06179954, 0.05089583],
[-0.00767319, 0.03826673, -0.03059354],
[-0.00784626, 0.0357439 , -0.02789764]], dtype=float32)>, <tf.Tensor: shape=(3,), dtype=float32, numpy=array([-0.01442908, 0.0709531 , -0.05652402], dtype=float32)>]
grads[0]
<tf.Tensor: shape=(4, 3), dtype=float32, numpy=
array([[-0.02361509, 0.1223269 , -0.0987118 ],
[ 0.01090372, -0.06179954, 0.05089583],
[-0.00767319, 0.03826673, -0.03059354],
[-0.00784626, 0.0357439 , -0.02789764]], dtype=float32)>
grads[1]
<tf.Tensor: shape=(3,), dtype=float32, numpy=array([-0.01442908, 0.0709531 , -0.05652402], dtype=float32)>