三层神经网络 (numpy实现)
文件目录
线性层
前向传播公式:
W
X
+
b
WX+b
WX+b
代码
def forward(self, X):
return np.matmul(X, self.W) + self.b
反向传播公式
∂
f
(
W
X
+
b
)
∂
W
=
f
′
(
W
X
+
b
)
⋅
X
\frac{\partial{f(WX+b)}}{\partial{W}} = f'(WX+b) \cdot X
∂W∂f(WX+b)=f′(WX+b)⋅X
∂
f
(
W
X
+
b
)
∂
b
=
f
′
(
W
X
+
b
)
\frac{\partial{f(WX+b)}}{\partial{b}} = f'(WX+b)
∂b∂f(WX+b)=f′(WX+b)
∂
f
(
W
X
+
b
)
∂
X
=
f
′
(
W
X
+
b
)
⋅
W
\frac{\partial{f(WX+b)}}{\partial{X}} = f'(WX+b) \cdot W
∂X∂f(WX+b)=f′(WX+b)⋅W
第一个公式是 W 的梯度下降方向,第二个公式是 b 的梯度下降方向,第 三个公式是该层向前一层的传递梯度。将其转化为矩阵形式,代码如下所示:
def backward(self, X, grad):
self.W_grad = np.matmul(X.T, grad) #k个样本权重累加求和
self.b_grad = np.matmul(grad.T, np.ones(X.shape[0])) #
return np.matmul(grad, self.W.T) #对x求导
该层除了前向传播和反向传播之外,还有参数更新
def update(self, lr):
self.W = self.W + self.W_grad * lr
self.b = self.b + self.b_grad * lr
线性层的全部代码为
import numpy as np
class Linear:
def __init__(self, in_shape, out_shape):
np.random.seed(10)
self.W = np.random.rand(in_shape, out_shape) # 初始化不能为全0
self.b = np.random.rand(1, out_shape)
self.W_grad = np.zeros((in_shape, out_shape))
self.b_grad = np.zeros((1, out_shape))
def forward(self, X):
return np.matmul(X, self.W) + self.b
def backward(self, X, grad):
self.W_grad = np.matmul(X.T, grad) #k个样本权重累加求和
self.b_grad = np.matmul(grad.T, np.ones(X.shape[0])) #
return np.matmul(grad, self.W.T) #对x求导
def update(self, lr):
self.W = self.W + self.W_grad * lr
self.b = self.b + self.b_grad * lr
sigmoid 层
前向传播公式
1
1
+
e
−
x
\frac{1}{1+e^{-x}}
1+e−x1
代码
def forward(self, X):
return 1 / (1 + np.exp(-X))
反向传播公式
∂
f
(
1
1
+
e
−
x
)
∂
x
=
f
′
(
1
1
+
e
−
x
)
(
1
1
+
e
−
x
)
(
1
−
1
1
+
e
−
x
)
\frac{\partial{f (\frac{1}{1+e^{-x}})}}{\partial{x}} = f'(\frac{1}{1+e^{-x}})(\frac{1}{1+e^{-x}})(1-\frac{1}{1+e^{-x}})
∂x∂f(1+e−x1)=f′(1+e−x1)(1+e−x1)(1−1+e−x1)
代码
def backward(self, X, grad):
return self.forward(X)*(1-self.forward(X)) * grad
全部代码为
import numpy as np
class Sigmoid:
def __init__(self):
pass
def forward(self, X):
return 1 / (1 + np.exp(-X))
def backward(self, X, grad):
return self.forward(X)*(1-self.forward(X)) * grad
tanh 层
前向传播公式
e
x
−
e
−
x
e
x
+
e
−
x
\frac{e^x-e^{-x}}{e^x+e^{-x}}
ex+e−xex−e−x
代码
def forward(self, X):
return (np.exp(X) - np.exp(-X)) / (np.exp(X) + np.exp(-X))
反向传播公式
∂
f
(
e
x
−
e
−
x
e
x
+
e
−
x
)
∂
x
=
f
′
(
e
x
−
e
−
x
e
x
+
e
−
x
)
(
1
−
(
e
x
−
e
−
x
e
x
+
e
−
x
)
2
)
\frac{\partial{f(\frac{e^x-e^{-x}}{e^x+e^{-x}})}}{\partial{x}} = f'(\frac{e^x-e^{-x}}{e^x+e^{-x}})(1-(\frac{e^x-e^{-x}}{e^x+e^{-x}})^2)
∂x∂f(ex+e−xex−e−x)=f′(ex+e−xex−e−x)(1−(ex+e−xex−e−x)2)
代码
def backward(self, X, grad):
return (1-np.power(self.forward(X),2)) * grad
全部代码为:
import numpy as np
class Tanh:
def __init__(self):
pass
def forward(self, X):
return (np.exp(X) - np.exp(-X)) / (np.exp(X) + np.exp(-X))
def backward(self, X, grad):
return (1-np.power(self.forward(X),2)) * grad
搭建神经网络:
def model_bulider(self):
self.Linear1 = Linear.Linear(self.size1[0],self.size1[1])
self.Tanh1 = Tanh.Tanh()
self.Linear2 = Linear.Linear(self.size2[0],self.size2[1])
self.Sigmoid1 = Sigmoid.Sigmoid()
损失函数采用的 MSE,损失和准确率的计算:
def MSEloss(self, X, Y):
return np.sum(np.power(self.predict(X) - Y, 2) / 2)
def acc(self, X, Y):
count = (np.sum(np.argmax(Y, axis=1) == np.argmax(self.predict(X), axis=1)))
return count / X.shape[0]
Batch_size 取值为 1 是就是单样本方式更新权重,否则就是采用批量方式更新权重。
[train_loss,train_acc,val_loss,val_acc] = \
model.train(train_X,train_Y,val_X,val_Y,epoch=epoch,batch_size=batch_size,show_epoch=show_epoch)
BP_Model.py
import numpy as np
import Linear
import Sigmoid
import Tanh
class BP_Model:
def __init__(self,size1,size2,lr):
self.size1 = size1
self.size2 = size2
self.lr = lr
def model_bulider(self):
self.Linear1 = Linear.Linear(self.size1[0],self.size1[1])
self.Tanh1 = Tanh.Tanh()
self.Linear2 = Linear.Linear(self.size2[0],self.size2[1])
self.Sigmoid1 = Sigmoid.Sigmoid()
def MSEloss(self, X, Y):
return np.sum(np.power(self.predict(X) - Y, 2) / 2)
def acc(self, X, Y):
count = (np.sum(np.argmax(Y, axis=1) == np.argmax(self.predict(X), axis=1)))
return count / X.shape[0]
def predict(self,X):
o0 = X
a1 = self.Linear1.forward(o0)
o1 = self.Tanh1.forward(a1)
a2 = self.Linear2.forward(o1)
o2 = self.Sigmoid1.forward(a2)
return o2
def update(self,X,Y):
#
o0 = X
a1 = self.Linear1.forward(o0)
o1 = self.Tanh1.forward(a1)
a2 = self.Linear2.forward(o1)
o2 = self.Sigmoid1.forward(a2)
# 反向传播,获取梯度
grad = (Y - o2)
grad = self.Sigmoid1.backward(a2, grad)
grad = self.Linear2.backward(o1, grad)
grad = self.Tanh1.backward(a1, grad)
grad = self.Linear1.backward(o0, grad)
#参数更新
self.Linear1.update(self.lr)
self.Linear2.update(self.lr)
def train(self,X_train,Y_train,X_val,Y_val,epoch,batch_size,show_epoch):
train_loss = []
train_acc = []
val_loss = []
val_acc = []
for i in range(epoch):
for j in range(X_train.shape[0]//batch_size):
self.update(X_train[j*batch_size:j*batch_size+batch_size,:],Y_train[j*batch_size:j*batch_size+batch_size,:])
loss = self.MSEloss(X_train, Y_train)
acc = self.acc(X_train, Y_train)
loss_ = self.MSEloss(X_val, Y_val)
acc_ = self.acc(X_val, Y_val)
if i%show_epoch==0:
print('epoch=',i)
print('loss={},acc={},val_loss={},val_acc={}'.format(loss,acc,loss_,acc_))
train_loss.append(loss)
train_acc.append(acc)
val_acc.append(acc_)
val_loss.append(loss_)
if loss <= 0.01:
break
return train_loss,train_acc,val_loss,val_acc
Main.py
import numpy as np
from matplotlib import pyplot as plt
import datetime
start_t = datetime.datetime.now()
import BP_Model
class_num = 3
hidden_num = 5
lr = 0.1
epoch = 5000
batch_size = 4
show_epoch = 100
train_rate = 0.8
X =np.array([[1.58, 2.32, -5.8],
[0.67, 1.58, -4.78],
[1.04, 1.01, -3.63],
[-1.49, 2.18, -3.39],
[-0.41, 1.21, -4.73],
[1.39, 3.16, 2.87],
[ 1.20, 1.40, -1.89],
[-0.92, 1.44, -3.22],
[0.45, 1.33, -4.38],
[-0.76, 0.84, -1.96],
[ 0.21, 0.03, -2.21],
[0.37, 0.28, -1.8],
[ 0.18, 1.22, 0.16],
[-0.24, 0.93, -1.01],
[-1.18, 0.39, -0.39],
[0.74, 0.96, -1.16],
[-0.38, 1.94, -0.48],
[0.02, 0.72, -0.17],
[ 0.44, 1.31, -0.14],
[ 0.46, 1.49, 0.68],
[-1.54, 1.17, 0.64],
[5.41, 3.45, -1.33],
[ 1.55, 0.99, 2.69],
[1.86, 3.19, 1.51],
[1.68, 1.79, -0.87],
[3.51, -0.22, -1.39],
[1.40, -0.44, -0.92],
[0.44, 0.83, 1.97],
[ 0.25, 0.68, -0.99],
[ 0.66, -0.45, 0.08]])
Y = np.zeros([X.shape[0],class_num])
Y[0:10,0]=1
Y[10:20,1]=1
Y[20:30,2]=1
train_X = np.concatenate((X[0:int(train_rate*10),:],X[10:10+int(train_rate*10),:],X[20:20+int(train_rate*10),:]),axis=0)
train_Y = np.concatenate((Y[0:int(train_rate*10),:],Y[10:10+int(train_rate*10),:],Y[20:20+int(train_rate*10),:]))
val_X = np.concatenate((X[int(train_rate*10):10,:],X[10+int(train_rate*10):20,:],X[20+int(train_rate*10):30,:]))
val_Y = np.concatenate((Y[int(train_rate*10):10,:],Y[10+int(train_rate*10):20,:],Y[20+int(train_rate*10):30,:]))
model = BP_Model.BP_Model(size1=[X.shape[1],hidden_num],size2=[hidden_num,Y.shape[1]],lr=lr)
model.model_bulider()
[train_loss,train_acc,val_loss,val_acc] = \
model.train(train_X,train_Y,val_X,val_Y,epoch=epoch,batch_size=batch_size,show_epoch=show_epoch)
plt.figure()
plt.subplot(1, 2, 1)
plt.plot(range(0, len(train_loss)*show_epoch, show_epoch), train_loss,label='train loss' )
plt.plot(range(0, len(val_loss)*show_epoch, show_epoch), val_loss,label='val loss' )
plt.legend()
plt.title('Loss')
plt.subplot(1, 2, 2)
plt.plot(range(0, len(train_acc)*show_epoch, show_epoch), train_acc,label='train acc' )
plt.plot(range(0, len(val_acc)*show_epoch, show_epoch), val_acc,label='val acc' )
plt.legend()
plt.title('Acc')
plt.show()
end_t = datetime.datetime.now()
print((end_t - start_t).seconds,'s')
模型验证