import numpy as np
# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 1000, 100, 10
# Create random input and output data
x = np.random.randn(N, D_in)
y = np.random.randn(N, D_out)
# Randomly initialize weights
w1 = np.random.randn(D_in, H)
w2 = np.random.randn(H, D_out)
learning_rate = 0.0002
def sigmod(x):
return 1 / (1 + np.e ** -x)
def sigmod_(x):
return sigmod(x) * (1 - sigmod(x))
for t in range(2000):
h = x.dot(w1)
#h_relu = np.maximum(h, 0)
s = sigmod(h)
y_pred = s.dot(w2)
loss = np.square(y_pred - y).sum()
print(t, loss)
#对最后一个线性模型参数的更新
grad_y_pred = 2.0 * (y_pred - y)
grad_w2 = s.T.dot(grad_y_pred)
#对W1进行更新 其实Relu
grad_s_sigmod= grad_y_pred.dot(w2.T)
grad_s = grad_s_sigmod.copy()
grad_s = sigmod_(grad_s)
grad_w1 = x.T.dot(grad_s)
# 更新梯度
w1 -= learning_rate * grad_w1
w2 -= learning_rate * grad_w2