9 篇文章 0 订阅

# 神经网络(多层感知器)原理

## 预备知识

O j = f ( ∑ i n ω j x j + b ) O_j = f(\sum_i^n\omega_jx_j+b)

sigmoid函数
y = 1 1 + e − x y = \frac{1}{1+e^{-x}}
relu函数
y = m a x ( 0 , x ) y = max(0,x)
tanh函数
y = e x − e − x e x + e − x y = \frac{e^{x}-e^{-x}}{e^{x}+e^{-x}}

l o s s = 1 N 1 2 ∑ n ( y − y ^ ) 2 loss =\frac{1}{N} \frac{1}{2}\sum^n(y-\hat{y})^2

l o s s = 1 N ∑ − y i c l o g ( p i c ) loss = \frac{1}{N}\sum-y_{ic}log(p_{ic})
c代表类别，当该 y i c y_{ic} 与真实类别相等时值为1，否则为0， p i c p_{ic} 代表为预测为第c类的概率，可由交叉熵函数求得。

## python实现

class My_bp_network():
def __init__(self, input_num, hidden_layer, output_num):
'''
input

input_num ：输入维度
hidden_layer ：隐藏层节点数
output_num ：输出维度

'''
self.hidden_layer = hidden_layer
self.input_num = input_num
self.output_num = output_num
self.weight1 = np.random.uniform(-1,1,(self.hidden_layer,self.input_num))
self.bias1 = np.zeros((1, self.hidden_layer))
self.weight2 = np.random.uniform(-1,1,(self.output_num,self.hidden_layer))
self.bias2 = np.zeros((1,1))
def sigmoid(self,x):
y = 1/(1+np.exp(-x))
return y
def forward(self,x):
'''
x : (m*1) m 为特征数
'''
self.input = x
self.hidden_layer_out = np.dot(self.weight1,x) + self.bias1 # 隐藏层
self.hidden_layer_out = self.sigmoid(self.hidden_layer_out) # 激活函数
# print(hidden_layer_out)
self.final_layer_out = np.dot(self.weight2,self.hidden_layer_out.T) + self.bias2 # 输出层
# print(final_layer_out)
output = self.sigmoid(self.final_layer_out)
return output
def backward(self, lr, yhat, y):
'''
lr ：学习率
yhat : 前向传播输出
y ：实际标签
'''
dLdy = -(y-yhat) # loss对yhat求导
dydxfinal = yhat*(1-yhat) # yhat对 sum(w(2)x) 求导 yhat = sigmoid(xfinal)
dLdxfinal = dLdy * dydxfinal # loss 对 sum(w(2)x) 求导
# print('loss对输出层wx的和的梯度',dLdxfinal)
self.bias2 -= lr*dLdxfinal # sum(w(2)x)对b求导
for i in range(self.hidden_layer): # 隐藏层从上到下梯度下降
dxfinaldwi = self.hidden_layer_out[0][i] # sum(w(2)x)对w(2)i求导
dxfinaldxi = self.weight2[0][i] # sum(w(2)x)对xi求导
self.weight2[0][i] -= lr * dLdxfinal * dxfinaldwi # loss对w(2)i求导
dxidhiddenout = self.hidden_layer_out[0][i]*(1-self.hidden_layer_out[0][i])
self.bias1[0][i] -= lr * dLdxfinal * dxfinaldxi * dxidhiddenout
for j in range(self.input_num): # 输入层从上到下梯度下降
dhiddenoutdwij = self.input[j]
# print(dxfinaldhiddenout*dhiddenoutdwij)
self.weight1[i][j] -= lr*dLdxfinal * dxfinaldxi * dxidhiddenout * dhiddenoutdwij

• 0
点赞
• 0
评论
• 1
收藏
• 一键三连
• 扫一扫，分享海报

04-23

10-19 1252
07-07 5517
10-21 1万+
08-04
03-29 2479
02-21
03-25 7860