本文只计算一个权重W0_00的梯度,因此只画出涉及到W0_00的forward路线。BP算法就是更方便的计算出神经网络参数的梯度。
定义网络
- z1=x0*w0_00
- a1=sigmoid(z1)
- z2=a1*w1_00
- a2=sigmoid(z2)
- p1=a2
- z3=a1*w1_01
- a3=sigmoid(z3)
- p2=a3
import numpy as np
W0 = np.array([[0.1,0.8],[0.4,0.6]])
W1 = np.array([[0.1,0.8],[0.4,0.6]])
X = np.array([[0.35,0.9]])
y = np.array([[0.5, 0.5]])
def sigmoid(x, deriv = False):
if deriv == True:
return x*(1-x)
else:
return 1 / (1 + np.exp(-x))
z1 = np.dot(X, W0)
a1 = sigmoid(z1)
z2 = np.dot(a1, W1)
a2 = sigmoid(z2)
y_hat = a2
error = (y_hat - y)**2
error_a2_delta = 2 * (y_hat[0][0] - y[0][0])
error_a3_delta = 2 * (y_hat[0][1] - y[0][1])
error_z2_delta = error_a2_delta * fun(a2[0][0],True)
error_z3_delta = error_a3_delta * fun(a2[0][1],True)
error_a1_delta = error_z2_delta * W1[0][0]+error_z3_delta * W1[0][1]
error_z1_delta = error_a1_delta * fun(a1[0][0],True)
W0_delta_0 = error_z1_delta * X[0][0]
tf2实现
import tensorflow as tf
W0 = tf.Variable(np.array([[0.1,0.8],[0.4,0.6]]))
W1 = tf.Variable(np.array([[0.1,0.8],[0.4,0.6]]))
X = np.array([[0.35,0.9]]) #输入层
y = np.array([[0.5, 0.5]]) #输出值
def net(W0, W1, X):
layer1 = tf.matmul(X,W0)
a1 = tf.nn.sigmoid(layer1)
print(a1)
layer2 = tf.matmul(a1,W1)
a2 = tf.nn.sigmoid(layer2)
return a2
def loss(y_hat, y):
return (y_hat - y)**2
with tf.GradientTape(persistent=True) as t:
z1 = tf.matmul(X,W0)
a1 = tf.nn.sigmoid(z1)
z2 = tf.matmul(a1,W1)
a2 = tf.nn.sigmoid(z2)
print(a1)
l = loss(a2, y)
t.watch([z1,a1,z2,a2,W0,W1])
grads1 = t.gradient(z2,[a1])
grads = t.gradient(l,[W0])
print(grads)