前向传播推导过程
反向传播推导过程是根据以上正向传播推导出的,利用求导的链式法则,这里将所搭建的神经网络的代码放上。 求完正向传播和反向传播之后再利用梯度下降算法减小误差。这里将每个参数按照以上规则进行计算。每个神经元与下一个神经元的连接都有权重值和偏置项
import numpy as np
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def sigmoidD(x):
return x * (1 - x)
x1s = np.array([])
x2s = np.array([])
x3s = np.array([])
y1s = np.array([])
y2s = np.array([])
# 参数定义
# 第一层神经元
# 第一个神经元
w11_1 = np.random.rand()
b11_1 = np.random.rand()
w21_1 = np.random.rand()
b21_1 = np.random.rand()
w31_1 = np.random.rand()
b31_1 = np.random.rand()
# 第二个神经元
w12_1 = np.random.rand()
b12_1 = np.random.rand()
w22_1 = np.random.rand()
b22_1 = np.random.rand()
w32_1 = np.random.rand()
b32_1 = np.random.rand()
# 第三个神经元
w13_1 = np.random.rand()
b13_1 = np.random.rand()
w23_1 = np.random.rand()
b23_1 = np.random.rand()
w33_1 = np.random.rand()
b33_1 = np.random.rand()
# 第四个神经元
w14_1 = np.random.rand()
b14_1 = np.random.rand()
w24_1 = np.random.rand()
b24_1 = np.random.rand()
w34_1 = np.random.rand()
b34_1 = np.random.rand()
# 第二层神经元
# 第一个神经元
w11_2 = np.random.rand()
b11_2 = np.random.rand()
w21_2 = np.random.rand()
b21_2 = np.random.rand()
w31_2 = np.random.rand()
b31_2 = np.random.rand()
w41_2 = np.random.rand()
b41_2 = np.random.rand()
# 第二个神经元
w12_2 = np.random.rand()
b12_2 = np.random.rand()
w22_2 = np.random.rand()
b22_2 = np.random.rand()
w32_2 = np.random.rand()
b32_2 = np.random.rand()
w42_2 = np.random.rand()
b42_2 = np.random.rand()
# 前向传播
def forward_propgation(x1s, x2s, x3s):
# 第一层
z1_1 = w11_1 * x1s + b11_1 + w21_1 * x2s + b21_1 + w31_1 * x3s + b31_1
a1_1 = sigmoid(z1_1)
z2_1 = w12_1 * x1s + b12_1 + w22_1 * x2s + b22_1 + w32_1 * x3s + b32_1
a2_1 = sigmoid(z2_1)
z3_1 = w13_1 * x1s + b13_1 + w23_1 * x2s + b23_1 + w33_1 * x3s + b33_1
a3_1 = sigmoid(z3_1)
z4_1 = w14_1 * x1s + b14_1 + w24_1 * x2s + b24_1 + w34_1 * x3s + b34_1
a4_1 = sigmoid(z4_1)
# 第二层
z1_2 = w11_2 * a1_1 + b11_2 + w21_2 * a2_1 + b21_2 + w31_2 * a3_1 + b31_2 + w41_2 * a4_1 + b41_2
a1_2 = sigmoid(z1_2)
z2_2 = w12_2 * a1_1 + b12_2 + w22_2 * a2_1 + b22_2 + w32_2 * a3_1 + b32_2 + w42_2 * a4_1 + b42_2
a2_2 = sigmoid(z2_2)
return a2_2, z2_2, a1_2, z1_2, a4_1, z4_1, a3_1, z3_1, a2_1, z2_1, a1_1, z1_1
a2_2, z2_2, a1_2, z1_2, a4_1, z4_1, a3_1, z3_1, a2_1, z2_1, a1_1, z1_1 = forward_propgation(x1s, x2s, x3s)
for _ in range(5000):
for i in range(100):
x1 = x1s[i]
x2 = x2s[i]
x3 = x3s[i]
y1 = y1s[i]
y2 = y2s[i]
# 先完成一次前向传播
a2_2, z2_2, a1_2, z1_2, a4_1, z4_1, a3_1, z3_1, a2_1, z2_1, a1_1, z1_1 = forward_propgation(x1s, x2s, x3s)
# 误差代价函数
e1 = (y1 - a1_2) ** 2
e2 = (y2 - a2_2) ** 2
# 求w11_2 b11_2 w21_2 b21_2 w31_2 b31_2 w41_2 b41_2
# 求w11_2
de1da1_2 = -2 * (y1 - a1_2)
da1_2dz1_2 = sigmoidD(a1_2)
dz1_2dw11_2 = a1_1
de1dw11_2 = de1da1_2 * da1_2dz1_2 * dz1_2dw11_2
# 求b11_2
de1da1_2 = -2 * (y1 - a1_2)
da1_2dz1_2 = sigmoidD(a1_2)
dz1_2db11_2 = 1
de1db11_2 = de1da1_2 * da1_2dz1_2 * dz1_2db11_2
# 求w21_2
de1da1_2 = -2 * (y1 - a1_2)
da1_2dz1_2 = sigmoidD(a1_2)
dz1_2dw21_2 = a2_1
de1dw21_2 = de1da1_2 * da1_2dz1_2 * dz1_2dw21_2
# 求b21_2
de1da1_2 = -2 * (y1 - a1_2)
da1_2dz1_2 = sigmoidD(a1_2)
dz1_2db21_2 = 1
de1db21_2 = de1da1_2 * da1_2dz1_2 * dz1_2db21_2
# 求w31_2
de1da1_2 = -2 * (y1 - a1_2)
da1_2dz1_2 = sigmoidD(a1_2)
dz1_2dw31_2 = a3_1
de1dw31_2 = de1da1_2 * da1_2dz1_2 * dz1_2dw31_2
# 求b31_2
de1da1_2 = -2 * (y1 - a1_2)
da1_2dz1_2 = sigmoidD(a1_2)
dz1_2db31_2 = 1
de1db31_2 = de1da1_2 * da1_2dz1_2 * dz1_2db31_2
# 求w41_2
de1da1_2 = -2 * (y1 - a1_2)
da1_2dz1_2 = sigmoidD(a1_2)
dz1_2dw41_2 = a4_1
de1dw41_2 = de1da1_2 * da1_2dz1_2 * dz1_2dw41_2
# 求b41_2
de1da1_2 = -2 * (y1 - a1_2)
da1_2dz1_2 = sigmoidD(a1_2)
dz1_2db41_2 = 1
de1db41_2 = de1da1_2 * da1_2dz1_2 * dz1_2db41_2
# 求w12_2 b12_2 w22_2 b22_2 w32_2 b32_2 w42_2 b42_2
# 求w12_2
de2da2_2 = -2 * (y2 - a2_2)
da2_2dz2_2 = sigmoidD(a2_2)
dz2_2dw12_2 = a1_1
de2dw12_2 = de2da2_2 * da2_2dz2_2 * dz2_2dw12_2
# 求b12_2
de2da2_2 = -2 * (y2 - a2_2)
da2_2dz2_2 = sigmoidD(a2_2)
dz2_2db12_2 = 1
de2db12_2 = de2da2_2 * da2_2dz2_2 * dz2_2db12_2
# 求w22_2
de2da2_2 = -2 * (y2 - a2_2)
da2_2dz2_2 = sigmoidD(a2_2)
dz2_2dw22_2 = a2_1
de2dw12_2 = de2da2_2 * da2_2dz2_2 * dz2_2dw22_2
# 求b22_2
de2da2_2 = -2 * (y2 - a2_2)
da2_2dz2_2 = sigmoidD(a2_2)
dz2_2db22_2 = 1
de2db22_2 = de2da2_2 * da2_2dz2_2 * dz2_2db22_2
# 求w32_2
de2da2_2 = -2 * (y2 - a2_2)
da2_2dz2_2 = sigmoidD(a2_2)
dz2_2dw32_2 = a3_1
de2dw32_2 = de2da2_2 * da2_2dz2_2 * dz2_2dw32_2
# 求b32_2
de2da2_2 = -2 * (y2 - a2_2)
da2_2dz2_2 = sigmoidD(a2_2)
dz2_2db32_2 = 1
de2db32_2 = de2da2_2 * da2_2dz2_2 * dz2_2db32_2
# 求w42_2
de2da2_2 = -2 * (y2 - a2_2)
da2_2dz2_2 = sigmoidD(a2_2)
dz2_2dw42_2 = a4_1
de2dw42_2 = de2da2_2 * da2_2dz2_2 * dz2_2dw42_2
# 求b42_2
de2da2_2 = -2 * (y2 - a2_2)
da2_2dz2_2 = sigmoidD(a2_2)
dz2_2db42_2 = 1
de2db42_2 = de2da2_2 * da2_2dz2_2 * dz2_2db42_2
# 求w11_1
de1da1_2 = -2 * (y1 - a1_2)
da1_2dz1_2 = sigmoidD(a1_2)
dz1_2da1_1 = w11_2
da1_1dz1_1 = sigmoidD(a1_1)
dz1_1dw11_1 = x1
de1dw11_1 = de1da1_2 * da1_2dz1_2 * dz1_2da1_1 * da1_1dz1_1 * dz1_1dw11_1
de2da2_2 = -2 * (y2 - a2_2)
da2_2dz2_2 = sigmoidD(a2_2)
dz2_2da1_1 = w12_2
da1_1dz1_1 = sigmoidD(a1_1)
dz1_1dw11_1 = x1
de2dw11_1 = de2da2_2 * da2_2dz2_2 * dz2_2da1_1 * da1_1dz1_1 * dz1_1dw11_1
dedw11_1 = de1dw11_1 + de2dw11_1 # 求误差对w11_1的和
# 求b11_1
de1da1_2 = -2 * (y1 - a1_2)
da1_2dz1_2 = sigmoidD(a1_2)
dz1_2da1_1 = w11_2
da1_1dz1_1 = sigmoidD(a1_1)
dz1_1db1_1 = 1
de1db11_1 = de1da1_2 * da1_2dz1_2 * dz1_2da1_1 * da1_1dz1_1 * dz1_1db1_1
de2da2_2 = -2 * (y2 - a2_2)
da2_2dz2_2 = sigmoidD(a2_2)
dz2_2da1_1 = w12_2
da1_1dz1_1 = sigmoidD(a1_1)
dz1_1db11_1 = 1
de2db11_1 = de2da2_2 * da2_2dz2_2 * dz2_2da1_1 * da1_1dz1_1 * dz1_1db11_1
dedb11_1 = de1db11_1 + de2db11_1 # 求误差对b11_1的和
# 求w21_1
de1da1_2 = -2 * (y1 - a1_2)
da1_2dz1_2 = sigmoidD(a1_2)
dz1_2da1_1 = w11_2
da1_1dz1_1 = sigmoidD(a1_1)
dz1_1dw21_1 = x2
de1dw21_1 = de1da1_2 * da1_2dz1_2 * dz1_2da1_1 * da1_1dz1_1 * dz1_1dw21_1
de2da2_2 = -2 * (y2 - a2_2)
da2_2dz2_2 = sigmoidD(a2_2)
dz2_2da1_1 = w12_2
da1_1dz1_1 = sigmoidD(a1_1)
dz1_1dw21_1 = x2
de2dw21_1 = de2da2_2 * da2_2dz2_2 * dz2_2da1_1 * da1_1dz1_1 * dz1_1dw21_1
dedw21_1 = de1dw21_1 + de2dw21_1 # 求误差对w21_1的和
# 求b21_1
de1da1_2 = -2 * (y1 - a1_2)
da1_2dz1_2 = sigmoidD(a1_2)
dz1_2da1_1 = w11_2
da1_1dz1_1 = sigmoidD(a1_1)
dz1_1db2_1 = 1
de1db21_1 = de1da1_2 * da1_2dz1_2 * dz1_2da1_1 * da1_1dz1_1 * dz1_1db2_1
de2da2_2 = -2 * (y2 - a2_2)
da2_2dz2_2 = sigmoidD(a2_2)
dz2_2da1_1 = w12_2
da1_1dz1_1 = sigmoidD(a1_1)
dz1_1db21_1 = 1
de2db21_1 = de2da2_2 * da2_2dz2_2 * dz2_2da1_1 * da1_1dz1_1 * dz1_1db21_1
dedb21_1 = de1db21_1 + de2db21_1 # 求误差对b21_1的和
# 求w31_1
de1da1_2 = -2 * (y1 - a1_2)
da1_2dz1_2 = sigmoidD(a1_2)
dz1_2da1_1 = w11_2
da1_1dz1_1 = sigmoidD(a1_1)
dz1_1dw31_1 = x3
de1dw31_1 = de1da1_2 * da1_2dz1_2 * dz1_2da1_1 * da1_1dz1_1 * dz1_1dw31_1
de2da2_2 = -2 * (y2 - a2_2)
da2_2dz2_2 = sigmoidD(a2_2)
dz2_2da1_1 = w12_2
da1_1dz1_1 = sigmoidD(a1_1)
dz1_1dw31_1 = x3
de2dw31_1 = de2da2_2 * da2_2dz2_2 * dz2_2da1_1 * da1_1dz1_1 * dz1_1dw31_1
dedw31_1 = de1dw31_1 + de2dw31_1 # 求误差对w31_1的和
# 求b31_1
de1da1_2 = -2 * (y1 - a1_2)
da1_2dz1_2 = sigmoidD(a1_2)
dz1_2da1_1 = w11_2
da1_1dz1_1 = sigmoidD(a1_1)
dz1_1db3_1 = 1
de1db31_1 = de1da1_2 * da1_2dz1_2 * dz1_2da1_1 * da1_1dz1_1 * dz1_1db3_1
de2da2_2 = -2 * (y2 - a2_2)
da2_2dz2_2 = sigmoidD(a2_2)
dz2_2da1_1 = w12_2
da1_1dz1_1 = sigmoidD(a1_1)
dz1_1db31_1 = 1
de2db31_1 = de2da2_2 * da2_2dz2_2 * dz2_2da1_1 * da1_1dz1_1 * dz1_1db31_1
dedb31_1 = de1db31_1 + de2db31_1 # 求误差对b31_1的和
# 求w12_1
de1da1_2 = -2 * (y1 - a1_2)
da1_2dz1_2 = sigmoidD(a1_2)
dz1_2da2_1 = w21_2
da2_1dz2_1 = sigmoidD(a2_1)
dz2_1dw12_1 = x1
de1dw12_1 = de1da1_2 * da1_2dz1_2 * dz1_2da2_1 * da2_1dz2_1 * dz2_1dw12_1
de2da2_2 = -2 * (y2 - a2_2)
da2_2dz2_2 = sigmoidD(a2_2)
dz2_2da2_1 = w22_2
da2_1dz2_1 = sigmoidD(a2_1)
dz2_1dw12_1 = x1
de2dw12_1 = de2da2_2 * da2_2dz2_2 * dz2_2da2_1 * da2_1dz2_1 * dz2_1dw12_1
dedw12_1 = de1dw12_1 + de2dw12_1 # 求误差对w12_1的和
# 求b12_1
de1da1_2 = -2 * (y1 - a1_2)
da1_2dz1_2 = sigmoidD(a1_2)
dz1_2da2_1 = w21_2
da2_1dz2_1 = sigmoidD(a2_1)
dz2_1db12_1 = 1
de1db12_1 = de1da1_2 * da1_2dz1_2 * dz1_2da2_1 * da2_1dz2_1 * dz2_1db12_1
de2da2_2 = -2 * (y2 - a2_2)
da2_2dz2_2 = sigmoidD(a2_2)
dz2_2da2_1 = w22_2
da2_1dz2_1 = sigmoidD(a2_1)
dz2_1db12_1 = 1
de2db12_1 = de2da2_2 * da2_2dz2_2 * dz2_2da2_1 * da2_1dz2_1 * dz2_1db12_1
dedb12_1 = de1db12_1 + de2db12_1 # 求误差对b12_1的和
# 求w22_1
de1da1_2 = -2 * (y1 - a1_2)
da1_2dz1_2 = sigmoidD(a1_2)
dz1_2da2_1 = w21_2
da2_1dz2_1 = sigmoidD(a2_1)
dz2_1dw22_1 = x2
de1dw22_1 = de1da1_2 * da1_2dz1_2 * dz1_2da2_1 * da2_1dz2_1 * dz2_1dw22_1
de2da2_2 = -2 * (y2 - a2_2)
da2_2dz2_2 = sigmoidD(a2_2)
dz2_2da2_1 = w22_2
da2_1dz2_1 = sigmoidD(a2_1)
dz2_1dw22_1 = x2
de2dw22_1 = de2da2_2 * da2_2dz2_2 * dz2_2da2_1 * da2_1dz2_1 * dz2_1dw22_1
dedw22_1 = de1dw22_1 + de2dw22_1 # 求误差对w22_1的和
# 求b22_1
de1da1_2 = -2 * (y1 - a1_2)
da1_2dz1_2 = sigmoidD(a1_2)
dz1_2da2_1 = w21_2
da2_1dz2_1 = sigmoidD(a2_1)
dz2_1db22_1 = 1
de1db22_1 = de1da1_2 * da1_2dz1_2 * dz1_2da2_1 * da2_1dz2_1 * dz2_1db22_1
de2da2_2 = -2 * (y2 - a2_2)
da2_2dz2_2 = sigmoidD(a2_2)
dz2_2da2_1 = w22_2
da2_1dz2_1 = sigmoidD(a2_1)
dz2_1db22_1 = 1
de2db22_1 = de2da2_2 * da2_2dz2_2 * dz2_2da2_1 * da2_1dz2_1 * dz2_1db22_1
dedb22_1 = de1db22_1 + de2db22_1 # 求误差对b22_1的和
# 求w32_1
de1da1_2 = -2 * (y1 - a1_2)
da1_2dz1_2 = sigmoidD(a1_2)
dz1_2da1_1 = w11_2
da1_1dz1_1 = sigmoidD(a1_1)
dz1_1dw32_1 = x3
de1dw32_1 = de1da1_2 * da1_2dz1_2 * dz1_2da1_1 * da1_1dz1_1 * dz1_1dw32_1
de2da2_2 = -2 * (y2 - a2_2)
da2_2dz2_2 = sigmoidD(a2_2)
dz2_2da1_1 = w12_2
da1_1dz1_1 = sigmoidD(a1_1)
dz1_1dw32_1 = x3
de2dw32_1 = de2da2_2 * da2_2dz2_2 * dz2_2da1_1 * da1_1dz1_1 * dz1_1dw32_1
dedw32_1 = de1dw32_1 + de2dw32_1 # 求误差对w32_1的和
# 求b32_1
de1da1_2 = -2 * (y1 - a1_2)
da1_2dz1_2 = sigmoidD(a1_2)
dz1_2da1_1 = w11_2
da1_1dz1_1 = sigmoidD(a1_1)
dz1_1db32_1 = 1
de1db32_1 = de1da1_2 * da1_2dz1_2 * dz1_2da1_1 * da1_1dz1_1 * dz1_1db32_1
de2da2_2 = -2 * (y2 - a2_2)
da2_2dz2_2 = sigmoidD(a2_2)
dz2_2da1_1 = w12_2
da1_1dz1_1 = sigmoidD(a1_1)
dz1_1db32_1 = 1
de2db32_1 = de2da2_2 * da2_2dz2_2 * dz2_2da1_1 * da1_1dz1_1 * dz1_1db32_1
dedb32_1 = de1db32_1 + de2db32_1 # 求误差对b32_1的和
# 求w13_1
de1da1_2 = -2 * (y1 - a1_2)
da1_2dz1_2 = sigmoidD(a1_2)
dz1_2da3_1 = w31_2
da3_1dz3_1 = sigmoidD(a3_1)
dz3_1dw13_1 = x1
de1dw13_1 = de1da1_2 * da1_2dz1_2 * dz1_2da3_1 * da3_1dz3_1 * dz3_1dw13_1
de2da2_2 = -2 * (y2 - a2_2)
da2_2dz2_2 = sigmoidD(a2_2)
dz2_2da3_1 = w32_2
da3_1dz3_1 = sigmoidD(a3_1)
dz3_1dw13_1 = x1
de2dw13_1 = de2da2_2 * da2_2dz2_2 * dz2_2da3_1 * da3_1dz3_1 * dz3_1dw13_1
dedw13_1 = de1dw13_1 + de2dw13_1 # 求误差对w13_1的和
# 求b13_1
de1da1_2 = -2 * (y1 - a1_2)
da1_2dz1_2 = sigmoidD(a1_2)
dz1_2da3_1 = w31_2
da3_1dz3_1 = sigmoidD(a3_1)
dz3_1db13_1 = 1
de1db13_1 = de1da1_2 * da1_2dz1_2 * dz1_2da3_1 * da3_1dz3_1 * dz3_1db13_1
de2da2_2 = -2 * (y2 - a2_2)
da2_2dz2_2 = sigmoidD(a2_2)
dz2_2da3_1 = w32_2
da3_1dz3_1 = sigmoidD(a3_1)
dz3_1db13_1 = 1
de2db13_1 = de2da2_2 * da2_2dz2_2 * dz2_2da3_1 * da3_1dz3_1 * dz3_1db13_1
dedb13_1 = de1db13_1 + de2db13_1 # 求误差对b13_1的和
# 求w23_1
de1da1_2 = -2 * (y1 - a1_2)
da1_2dz1_2 = sigmoidD(a1_2)
dz1_2da3_1 = w31_2
da3_1dz3_1 = sigmoidD(a3_1)
dz3_1dw23_1 = x2
de1dw23_1 = de1da1_2 * da1_2dz1_2 * dz1_2da3_1 * da3_1dz3_1 * dz3_1dw23_1
de2da2_2 = -2 * (y2 - a2_2)
da2_2dz2_2 = sigmoidD(a2_2)
dz2_2da3_1 = w32_2
da3_1dz3_1 = sigmoidD(a3_1)
dz3_1dw23_1 = x2
de2dw23_1 = de2da2_2 * da2_2dz2_2 * dz2_2da3_1 * da3_1dz3_1 * dz3_1dw23_1
dedw23_1 = de1dw23_1 + de2dw23_1 # 求误差对w23_1的和
# 求b23_1
de1da1_2 = -2 * (y1 - a1_2)
da1_2dz1_2 = sigmoidD(a1_2)
dz1_2da3_1 = w31_2
da3_1dz3_1 = sigmoidD(a3_1)
dz3_1db23_1 = 1
de1db23_1 = de1da1_2 * da1_2dz1_2 * dz1_2da3_1 * da3_1dz3_1 * dz3_1db23_1
de2da2_2 = -2 * (y2 - a2_2)
da2_2dz2_2 = sigmoidD(a2_2)
dz2_2da3_1 = w32_2
da3_1dz3_1 = sigmoidD(a3_1)
dz3_1db23_1 = 1
de2db23_1 = de2da2_2 * da2_2dz2_2 * dz2_2da3_1 * da3_1dz3_1 * dz3_1db23_1
dedb23_1 = de1db23_1 + de2db23_1 # 求误差对b23_1的和
# 求w33_1
de1da1_2 = -2 * (y1 - a1_2)
da1_2dz1_2 = sigmoidD(a1_2)
dz1_2da3_1 = w31_2
da3_1dz3_1 = sigmoidD(a3_1)
dz3_1dw33_1 = x3
de1dw33_1 = de1da1_2 * da1_2dz1_2 * dz1_2da3_1 * da3_1dz3_1 * dz3_1dw33_1
de2da2_2 = -2 * (y2 - a2_2)
da2_2dz2_2 = sigmoidD(a2_2)
dz2_2da3_1 = w32_2
da3_1dz3_1 = sigmoidD(a3_1)
dz3_1dw33_1 = x3
de2dw33_1 = de2da2_2 * da2_2dz2_2 * dz2_2da3_1 * da3_1dz3_1 * dz3_1dw33_1
dedw33_1 = de1dw33_1 + de2dw33_1 # 求误差对w33_1的和
# 求b23_1
de1da1_2 = -2 * (y1 - a1_2)
da1_2dz1_2 = sigmoidD(a1_2)
dz1_2da3_1 = w31_2
da3_1dz3_1 = sigmoidD(a3_1)
dz3_1db33_1 = 1
de1db33_1 = de1da1_2 * da1_2dz1_2 * dz1_2da3_1 * da3_1dz3_1 * dz3_1db33_1
de2da2_2 = -2 * (y2 - a2_2)
da2_2dz2_2 = sigmoidD(a2_2)
dz2_2da3_1 = w32_2
da3_1dz3_1 = sigmoidD(a3_1)
dz3_1db33_1 = 1
de2db33_1 = de2da2_2 * da2_2dz2_2 * dz2_2da3_1 * da3_1dz3_1 * dz3_1db33_1
dedb33_1 = de1db33_1 + de2db33_1 # 求误差对b33_1的和
# 求w14_1
de1da1_2 = -2 * (y1 - a1_2)
da1_2dz1_2 = sigmoidD(a1_2)
dz1_2da4_1 = w41_2
da4_1dz4_1 = sigmoidD(a4_1)
dz4_1dw14_1 = x1
de1dw14_1 = de1da1_2 * da1_2dz1_2 * dz1_2da4_1 * da4_1dz4_1 * dz4_1dw14_1
de2da2_2 = -2 * (y2 - a2_2)
da2_2dz2_2 = sigmoidD(a2_2)
dz2_2da4_1 = w42_2
da4_1dz4_1 = sigmoidD(a4_1)
dz4_1dw14_1 = x1
de2dw14_1 = de2da2_2 * da2_2dz2_2 * dz2_2da4_1 * da4_1dz4_1 * dz4_1dw14_1
dedw14_1 = de1dw14_1 + de2dw14_1 # 求误差对w14_1的和
# 求b14_1
de1da1_2 = -2 * (y1 - a1_2)
da1_2dz1_2 = sigmoidD(a1_2)
dz1_2da4_1 = w41_2
da4_1dz4_1 = sigmoidD(a4_1)
dz4_1db14_1 = 1
de1db14_1 = de1da1_2 * da1_2dz1_2 * dz1_2da4_1 * da4_1dz4_1 * dz4_1db14_1
de2da2_2 = -2 * (y2 - a2_2)
da2_2dz2_2 = sigmoidD(a2_2)
dz2_2da4_1 = w42_2
da4_1dz4_1 = sigmoidD(a4_1)
dz4_1db14_1 = 1
de2db14_1 = de2da2_2 * da2_2dz2_2 * dz2_2da4_1 * da4_1dz4_1 * dz4_1db14_1
dedb14_1 = de1db14_1 + de2db14_1 # 求误差对b14_1的和
# 求w24_1
de1da1_2 = -2 * (y1 - a1_2)
da1_2dz1_2 = sigmoidD(a1_2)
dz1_2da4_1 = w41_2
da4_1dz4_1 = sigmoidD(a4_1)
dz4_1dw24_1 = x2
de1dw24_1 = de1da1_2 * da1_2dz1_2 * dz1_2da4_1 * da4_1dz4_1 * dz4_1dw24_1
de2da2_2 = -2 * (y2 - a2_2)
da2_2dz2_2 = sigmoidD(a2_2)
dz2_2da4_1 = w42_2
da4_1dz4_1 = sigmoidD(a4_1)
dz4_1dw24_1 = x2
de2dw24_1 = de2da2_2 * da2_2dz2_2 * dz2_2da4_1 * da4_1dz4_1 * dz4_1dw24_1
dedw24_1 = de1dw24_1 + de2dw24_1 # 求误差对w24_1的和
# 求b24_1
de1da1_2 = -2 * (y1 - a1_2)
da1_2dz1_2 = sigmoidD(a1_2)
dz1_2da4_1 = w41_2
da4_1dz4_1 = sigmoidD(a4_1)
dz4_1db24_1 = 1
de1db24_1 = de1da1_2 * da1_2dz1_2 * dz1_2da4_1 * da4_1dz4_1 * dz4_1db24_1
de2da2_2 = -2 * (y2 - a2_2)
da2_2dz2_2 = sigmoidD(a2_2)
dz2_2da4_1 = w42_2
da4_1dz4_1 = sigmoidD(a4_1)
dz4_1db24_1 = 1
de2db24_1 = de2da2_2 * da2_2dz2_2 * dz2_2da4_1 * da4_1dz4_1 * dz4_1db24_1
dedb24_1 = de1db24_1 + de2db24_1 # 求误差对b24_1的和
# 求w34_1
de1da1_2 = -2 * (y1 - a1_2)
da1_2dz1_2 = sigmoidD(a1_2)
dz1_2da4_1 = w41_2
da4_1dz4_1 = sigmoidD(a4_1)
dz4_1dw34_1 = x3
de1dw34_1 = de1da1_2 * da1_2dz1_2 * dz1_2da4_1 * da4_1dz4_1 * dz4_1dw34_1
de2da2_2 = -2 * (y2 - a2_2)
da2_2dz2_2 = sigmoidD(a2_2)
dz2_2da4_1 = w42_2
da4_1dz4_1 = sigmoidD(a4_1)
dz4_1dw34_1 = x3
de2dw34_1 = de2da2_2 * da2_2dz2_2 * dz2_2da4_1 * da4_1dz4_1 * dz4_1dw34_1
dedw34_1 = de1dw34_1 + de2dw34_1 # 求误差对w34_1的和
# 求b34_1
de1da1_2 = -2 * (y1 - a1_2)
da1_2dz1_2 = sigmoidD(a1_2)
dz1_2da4_1 = w41_2
da4_1dz4_1 = sigmoidD(a4_1)
dz4_1db34_1 = 1
de1db34_1 = de1da1_2 * da1_2dz1_2 * dz1_2da4_1 * da4_1dz4_1 * dz4_1db34_1
de2da2_2 = -2 * (y2 - a2_2)
da2_2dz2_2 = sigmoidD(a2_2)
dz2_2da4_1 = w42_2
da4_1dz4_1 = sigmoidD(a4_1)
dz4_1db34_1 = 1
de2db34_1 = de2da2_2 * da2_2dz2_2 * dz2_2da4_1 * da4_1dz4_1 * dz4_1db34_1
dedb34_1 = de1db34_1 + de2db34_1 # 求误差对b34_1的和
deda1_2 = -2 * (y1 - a1_2)
da1_2dz1_2 = sigmoidD(a1_2)
dz1_2dw11_2 = a1_1
dedw11_2 = deda1_2 * da1_2dz1_2 * dz1_2dw11_2
deda1_2 = -2 * (y1 - a1_2)
da1_2dz1_2 = sigmoidD(a1_2)
dz1_2db11_2 = 1
dedb11_2 = deda1_2 * da1_2dz1_2 * dz1_2db11_2
deda1_2 = -2 * (y1 - a1_2)
da1_2dz1_2 = sigmoidD(a1_2)
dz1_2dw21_2 = a2_1
dedw21_2 = deda1_2 * da1_2dz1_2 * dz1_2dw21_2
deda1_2 = -2 * (y1 - a1_2)
da1_2dz1_2 = sigmoidD(a1_2)
dz1_2db21_2 = 1
dedb21_2 = deda1_2 * da1_2dz1_2 * dz1_2db21_2
deda1_2 = -2 * (y1 - a1_2)
da1_2dz1_2 = sigmoidD(a1_2)
dz1_2dw31_2 = a3_1
dedw31_2 = deda1_2 * da1_2dz1_2 * dz1_2dw31_2
deda1_2 = -2 * (y1 - a1_2)
da1_2dz1_2 = sigmoidD(a1_2)
dz1_2db31_2 = 1
dedb31_2 = deda1_2 * da1_2dz1_2 * dz1_2db31_2
deda1_2 = -2 * (y1 - a1_2)
da1_2dz1_2 = sigmoidD(a1_2)
dz1_2dw41_2 = a4_1
dedw41_2 = deda1_2 * da1_2dz1_2 * dz1_2dw41_2
deda1_2 = -2 * (y1 - a1_2)
da1_2dz1_2 = sigmoidD(a1_2)
dz1_2db41_2 = 1
dedb41_2 = deda1_2 * da1_2dz1_2 * dz1_2db41_2
# 第二层,第二个神经元
deda2_2 = -2 * (y2 - a2_2)
da2_2dz2_2 = sigmoidD(a2_2)
dz2_2dw12_2 = a1_1
dedw12_2 = deda2_2 * da2_2dz2_2 * dz2_2dw12_2
deda2_2 = -2 * (y2 - a2_2)
da2_2dz2_2 = sigmoidD(a2_2)
dz2_2db12_2 = 1
dedb12_2 = deda2_2 * da2_2dz2_2 * dz2_2db12_2
deda2_2 = -2 * (y2 - a2_2)
da2_2dz2_2 = sigmoidD(a2_2)
dz2_2dw22_2 = a2_1
dedw22_2 = deda2_2 * da2_2dz2_2 * dz2_2dw22_2
deda2_2 = -2 * (y2 - a2_2)
da2_2dz2_2 = sigmoidD(a2_2)
dz2_2db22_2 = 1
dedb22_2 = deda2_2 * da2_2dz2_2 * dz2_2db22_2
deda2_2 = -2 * (y2 - a2_2)
da2_2dz2_2 = sigmoidD(a2_2)
dz2_2dw32_2 = a3_1
dedw32_2 = deda2_2 * da2_2dz2_2 * dz2_2dw32_2
deda2_2 = -2 * (y2 - a2_2)
da2_2dz2_2 = sigmoidD(a2_2)
dz2_2db32_2 = 1
dedb32_2 = deda2_2 * da2_2dz2_2 * dz2_2db32_2
deda2_2 = -2 * (y2 - a2_2)
da2_2dz2_2 = sigmoidD(a2_2)
dz2_2dw42_2 = a4_1
dedw42_2 = deda2_2 * da2_2dz2_2 * dz2_2dw42_2
deda2_2 = -2 * (y2 - a2_2)
da2_2dz2_2 = sigmoidD(a2_2)
dz2_2db42_2 = 1
dedb42_2 = deda2_2 * da2_2dz2_2 * dz2_2db42_2
# 实行梯度下降
alpha = 0.02
w11_1 = w11_1 - alpha * dedw11_1
b11_1 = b11_1 - alpha * dedb11_1
w21_1 = w21_1 - alpha * dedw21_1
b21_1 = b21_1 - alpha * dedb21_1
w31_1 = w31_1 - alpha * dedw31_1
b31_1 = b31_1 - alpha * dedb31_1
# 第二个神经元
w12_1 = w12_1 - alpha * dedw12_1
b12_1 = b12_1 - alpha * dedb12_1
w22_1 = w22_1 - alpha * dedw22_1
b22_1 = b22_1 - alpha * dedb22_1
w32_1 = w32_2 - alpha * dedw32_1
b32_1 = b32_1 - alpha * dedb32_1
# 第三个神经元
w13_1 = w13_1 - alpha * dedw13_1
b13_1 = b13_1 - alpha * dedb13_1
w23_1 = w23_1 - alpha * dedw23_1
b23_1 = b23_1 - alpha * dedb23_1
w33_1 = w33_1 - alpha * dedw33_1
b33_1 = b33_1 - alpha * dedb33_1
# 第四个神经元
w14_1 = w14_1 - alpha * dedw14_1
b14_1 = b14_1 - alpha * dedb14_1
w24_1 = w24_1 - alpha * dedw24_1
b24_1 = b24_1 - alpha * dedb24_1
w34_1 = w34_1 - alpha * dedw34_1
b34_1 = b34_1 - alpha * dedb34_1
# 第二层神经元
# 第一个神经元
w11_2 = w11_2 - alpha * dedw11_2
b11_2 = b11_2 - alpha * dedb11_2
w21_2 = w21_2 - alpha * dedw21_2
b21_2 = b21_2 - alpha * dedb21_2
w31_2 = w31_2 - alpha * dedw32_2
b31_2 = b31_2 - alpha * dedb31_2
w41_2 = w41_2 - alpha * dedw41_2
b41_2 = b41_2 - alpha * dedb41_2
# 第二个神经元
w12_2 = w12_2 - alpha * dedw12_2
b12_2 = b12_2 - alpha * dedb12_2
w22_2 = w22_2 - alpha * dedw22_2
b22_2 = b22_2 - alpha * dedb22_2
w32_2 = w32_2 - alpha * dedw32_2
b32_2 = b32_2 - alpha * dedb32_2
w42_2 = w42_2 - alpha * dedw42_2
b42_2 = b42_2 - alpha * dedb42_2