激活函数为relu函数
import numpy as np
N = 64 # 样本数
D_in = 1000 # 输入维度
H = 100 # 隐藏层维度
D_out = 10 # 输出维度
# 随机创建一些训练数据
X = np.random.randn(N, D_in)
Y = np.random.randn(N, D_out)
# 随机初始权重
w1 = np.random.randn(D_in, H)
w2 = np.random.randn(H, D_out)
# 设置学习率
learning_rate = 1e-6
# 开始训练
for t in range(500):
h = X.dot(w1) # N*H
h_relu = np.maximum(h, 0)
y_pred = h_relu.dot(w2)
# computer loss
loss = np.square(y_pred-Y).sum() # 最小均方误差
print(t, loss)
# Backward pass
# computer the gradient
grad_y_pred = 2.0 * (y_pred - Y)
grad_w2 = h_relu.T.dot(grad_y_pred)
grad_h_relu = grad_y_pred.dot(w2.T)
grad_h = grad_h_relu.copy()
grad_h[h <0] = 0
grad_w1 = X.T.dot(grad_h)
# update weights of w1 and w2
w1 -= learning_rate * grad_w1
w2 -= learning_rate * grad_w2