一、BP神经网络的前向传播和梯度更新
二、程序设计
1、准备工作
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
learning_rate = 0.01
#输出转化为独热编码(one-hot)的形式
def convert_to_one_hot(y, C):
return np.eye(C)[y.reshape(-1)]
def sigmoid(x):
return 1/(1+np.exp(-x))
#数据处理
#输入数据每一列是一个样本,标签使用独热编码
iris = datasets.load_iris()
train_data, test_data, train_label, test_label = train_test_split(iris.data, iris.target, test_size=0.2,random_state=0)
train_label = convert_to_one_hot(train_label,3)
test_label = convert_to_one_hot(test_label,3)
print("X_train="+str(train_data.shape))
print("X_test="+str(test_data.shape))
print("Y_train="+str(train_label.shape))
print("Y_test"+str(test_label.shape))
def compute_cost(A2,Y):
m = A2.shape[1]
cost = np.sum(np.power(A2-Y,2))/(2*m)
cost = float(np.squeeze(cost))
return cost
2、 初始化参数
def initialize_parameters(n_x,n_h,n_y):
np.random.seed(1)
W1 = np.random.randn(n_x,n_h)*0.01
b1 = np.zeros(shape=(1,n_h))
W2 = np.random.randn(n_h,n_y)*0.01
b2 = np.zeros(shape=(1,n_y))
parameters = {
"W1":W1, "W2":W2, "b1":b1, "b2":b2
}
return parameters
3、前向传播
def forward_propagation(X,parameters):
W1 = parameters["W1"]
W2 = parameters["W2"]
b1 = parameters["b1"]
b2 = parameters["b2"]
Z1 = np.dot(X,W1) + b1
A1 = np.tanh(Z1)
Z2 = np.dot(A1,W2) + b2
A2 = sigmoid(Z2)
cache = {
"Z1":Z1, "Z2":Z2, "A1":A1, "A2":A2
}
return (A2,cache)
4、反向传播
def backward_propagation(parameters,cache,X,Y):
W1 = parameters["W1"]
W2 = parameters["W2"]
A1 = cache["A1"]
A2 = cache["A2"]
W2_delta = (Y-A2)*A2*(1-A2)
dW2 = np.dot(A1.T, W2_delta )
W1_delta = W2_delta.dot(W2.T)*(1-np.power(A1,2))
dW1 = np.dot(X.T,W1_delta)
grads = {
"W2_delta":W2_delta, "dW2":dW2, "W1_delta":W1_delta, "dW1":dW1
}
return grads
5、更新权重值
def update_parameters(parameters, grads, learning_rate):
W1 = parameters["W1"]
W2 = parameters["W2"]
b1 = parameters["b1"]
b2 = parameters["b2"]
dW1 = grads["dW1"]
dW2 = grads["dW2"]
W1_delta = grads["W1_delta"]
W2_delta = grads["W2_delta"]
b1 = b1 + W1_delta*learning_rate
b2 = b2 + W2_delta*learning_rate
W1 = W1 + dW1*learning_rate
W2 = W2 + dW2*learning_rate
parameters = {
"W1":W1, "W2":W2, "b1":b1, "b2":b2
}
return parameters
6、训练模型
parameters = initialize_parameters(4,40,3)
AA2=train_label
Y=train_label
cost = 0
for i in range(1000):
A2,cache = forward_propagation(train_data,parameters)
cost = compute_cost(A2,train_label)
grads = backward_propagation(parameters,cache,train_data,train_label)
parameters = update_parameters(parameters,grads,learning_rate)
AA2=A2
cost=cost
print("cost in "+str(i)+"th loop is :"+str(cost))
7、测试
AA2 = np.around(AA2,0)
accuracy = 1 - (np.sum(np.power(AA2-train_label,2))/2)/train_data.shape[0]
print("your training accuracy is:"+str(accuracy))
print("your training cost is:"+str(cost))