使用2层隐层,使用sigmoid和ReLU激活。
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
input = np.array([[0, 0], [0, 1], [1, 1], [1, 0]]) # 4 * 2
target = np.array([0, 1, 0, 1]).reshape(4, 1) # 4 * 1
# input = (input - np.mean(input)) / np.std(input)
# target = (target - np.mean(target)) / np.std(target)
epochs = 100000
lr = 0.01
num_node_1 = 3
num_node_2 = 4
number = 4
W1 = np.random.randn(2, num_node_1) / np.sqrt(number)
W2 = np.random.randn(num_node_1, num_node_2) / np.sqrt(number)
W3 = np.random.randn(num_node_2, 1) / np.sqrt(number)
b1 = np.zeros((1, num_node_1))
b2 = np.zeros((1, num_node_2))
b3 = np.zeros((1, 1))
loss = []
result = []
for epoch in range(epochs):
# forward
hidden1_ = np.dot(input, W1) + b1
hidden1 = 1 / (np.exp(-1 * hidden1_) + 1)
hidden2_ = np.dot(hidden1, W2) + b2
# hidden2 = 1 / (np.exp(-1 * hidden2_) + 1)
hidden2 = np.maximum(hidden2_, 0)
output_ = np.dot(hidden2, W3) + b3
output = 1 / (np.exp(-1 * output_) + 1)
result.append(output)
# compute the gradient
error = 0.5 * np.sum((output - target) * (output - target))
loss.append(error)
# if epoch % 5000 == 0:
# print(error)
# print(np.round(output, 2))
dout = (output - target) * (1 - output) * output
dW3 = np.dot(hidden2.T, dout)
db3 = np.sum(dout, axis=0)
dhidden2 = np.dot(dout, W3.T) * (hidden2 >= 0)
dW2 = np.dot(hidden1.T, dhidden2)
db2 = np.sum(dhidden2, axis=0)
dhidden1 = np.dot(dhidden2, W2.T) * (1 - hidden1) * hidden1
dW1 = np.dot(input.T, dhidden1)
db1 = np.sum(dhidden1, axis=0)
# backward
W3 -= lr * dW3
b3 -= lr * db3
W2 -= lr * dW2
b2 -= lr * db2
W1 -= lr * dW1
b1 -= lr * db1
plt.plot(loss)