2017-11-23 22:41
依据上篇文章的推导,设l(n)层的第x个神经元为l(n)_(x),其误差值为delta_l(n)_(x),则有以下公式:
delta_l(n)_(x) = delta_l(n+1) * w(n+1) * f'(netl(n)_(x))
其中f'(netl(n)_(x))为神经元delta_l(n)_(x)激活函数的导数,w(n+1)为delta_l(n)_(x)与delta_l(n)之间链接的权重矩阵。
对l(n)层网络的偏置b(n)求导,有以下公式:
d b(n)=sum( delta_l(n) )
其中sum为求和函数,把数组delta_l(n)内的元素全部相加。
依据此公式构建出一个6层的神经网络,其结构如下:
这么设计纯粹是为了好看,神经元数量可以自定义,由图也可以看出,当神经网络层数变得更多,神经元更多,这种全连接式的神经网络结构将变得更加复杂,反而会使网络效率降低,因此便提出了卷积神经网络,本文暂不予讨论。
上图网络的实现代码如下:
'''*********************************************************************'''
import numpy as np
def f(x):
return 1/(1+np.exp(-x))
def fd(a):
return a*(1-a)
def run():
x=np.array([0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1])
y=0.25
np.random.seed(1)
w1=np.random.random((8,10))
w2=np.random.random((6,8))
w3=np.random.random((4,6))
w4=np.random.random((2,4))
w5=np.random.random((1,2))
b1,b2,b3,b4,b5=0.2,0.3,0.4,0.5,0.6
m=100
alpha=0.9
for i in range(m):
netl1=np.array([np.sum(x*w1[0]),
np.sum(x*w1[1]),
np.sum(x*w1[2]),
np.sum(x*w1[3]),
np.sum(x*w1[4]),
np.sum(x*w1[5]),
np.sum(x*w1[6]),
np.sum(x*w1[7])])+b1
outl1=f(netl1)
netl2=np.array([np.sum(outl1[0]*w2[0]),
np.sum(outl1[1]*w2[1]),
np.sum(outl1[2]*w2[2]),
np.sum(outl1[3]*w2[3]),
np.sum(outl1[4]*w2[4]),
np.sum(outl1[5]*w2[5])])+b2
outl2=f(netl2)
netl3=np.array([np.sum(outl2[0]*w3[0]),
np.sum(outl2[1]*w3[1]),
np.sum(outl2[2]*w3[2]),
np.sum(outl2[3]*w3[3])])+b3
outl3=f(netl3)
netl4=np.array([np.sum(outl3[0]*w4[0]),
np.sum(outl3[1]*w4[1])])+b4
outl4=f(netl4)
nety=np.array([np.sum(outl4[0]*w5[0])])+b5
outy=f(nety)
#计算总误差
E=0.5*(y-outy)*(y-outy)
#反向传播
delta_y=-(y-outy)*fd(outy)
delta_l4=np.sum(delta_y*w5*fd(outl4))
delta_l3=np.array([np.sum(delta_l4*w4.T[0])*fd(outl3[0]),
np.sum(delta_l4*w4.T[1])*fd(outl3[1]),
np.sum(delta_l4*w4.T[2])*fd(outl3[2]),
np.sum(delta_l4*w4.T[3])*fd(outl3[3])])
delta_l2=np.array([np.sum(delta_l3*w3.T[0])*fd(outl2[0]),
np.sum(delta_l3*w3.T[1])*fd(outl2[1]),
np.sum(delta_l3*w3.T[2])*fd(outl2[2]),
np.sum(delta_l3*w3.T[3])*fd(outl2[3]),
np.sum(delta_l3*w3.T[4])*fd(outl2[4]),
np.sum(delta_l3*w3.T[5])*fd(outl2[5])])
delta_l1=np.array([np.sum(delta_l2*w2.T[0])*fd(outl1[0]),
np.sum(delta_l2*w2.T[1])*fd(outl1[1]),
np.sum(delta_l2*w2.T[2])*fd(outl1[2]),
np.sum(delta_l2*w2.T[3])*fd(outl1[3]),
np.sum(delta_l2*w2.T[4])*fd(outl1[4]),
np.sum(delta_l2*w2.T[5])*fd(outl1[5]),
np.sum(delta_l2*w2.T[6])*fd(outl1[6]),
np.sum(delta_l2*w2.T[7])*fd(outl1[7])])
dw1=np.array([delta_l1*x[0],
delta_l1*x[1],
delta_l1*x[2],
delta_l1*x[3],
delta_l1*x[4],
delta_l1*x[5],
delta_l1*x[6],
delta_l1*x[7],
delta_l1*x[8],
delta_l1*x[9]])
w1-=alpha*dw1.T
dw2=np.array([delta_l2*outl1[0],
delta_l2*outl1[1],
delta_l2*outl1[2],
delta_l2*outl1[3],
delta_l2*outl1[4],
delta_l2*outl1[5],
delta_l2*outl1[6],
delta_l2*outl1[7]])
w2-=alpha*dw2.T
dw3=np.array([delta_l3*outl2[0],
delta_l3*outl2[1],
delta_l3*outl2[2],
delta_l3*outl2[3],
delta_l3*outl2[4],
delta_l3*outl2[5]])
w3-=alpha*dw3.T
dw4=np.array([delta_l4*outl3[0],
delta_l4*outl3[1],
delta_l4*outl3[2],
delta_l4*outl3[3]])
w4-=alpha*dw4.T
dw5=np.array([delta_y*outl4[0],
delta_y*outl4[1]])
w5-=alpha*dw5.T
print(outy)
if __name__=='__main__':
run()
'''*******************************************************************'''