前言
看完课程,根据记忆用numpy实现一遍深层神经网络,用上一篇实验数据验证实现算法是否正确。自己做的逻辑比较简单,实现了init_parameters、forward_propagation、back_propagation、update_parameters几个主要方法。hidden layer可选tanh和relu激活函数,输出层LR二分类。相比于Andrew Ng给出的jupyter代码实现课件,感觉自己实现的代码比较粗糙,只是实现了大块的功能,没有细分功能模块,代码复用性不高。
网络结构(课件截图)
激活函数σ=tanh(z),tanh求导公式σ’(z)=1-(tanh)2
激活函数σ=relu(z),relu求导公式分段σ’(z)=1, z>0
σ’(z)=0, z<=0
output layer激活函数σ=sigmoid(z),求导σ’(z) = σ(1-σ)
input layer层(l=0,x1和x2,节点数2)
hidden layer(l=1,2,3,4,节点数分别为3,5,4,2)
output layer(l=5,节点数1)
主要模块实现
(1)正向传播
L:网络层数(输入,隐藏,输出)
l=0:input layer,输入数据
l=1~L-2:hidden layer,参数部分W1…W(L-2),b1…b(L-2)
l=L-1:output layer,参数W(L-1), b(L-1)
说明:hidden layer和output layer的激活函数不同,计算forward propagation时候分成2部分处理,如代码for内部是hidden layer,外部淡出计算output layer。
(2)反向传播
输出层和隐层激活函数不同,反向传播分开计算,如下
测试输出结果
网络初始化参数,和上一篇浅层神经网络一样,得到结果也一样。layer_dims:网络结构。layer_dims[0]是input layer数据特征数;layer_dims[-1]是 output layer输出结果,二分类是0或1;其它是hidden layer,每层的节点数。
结果(隐层激活函数[各层节点数]=准去率)
下面是感觉很迷茫的结果。不知道为啥,relu多隐藏层后,总是如下图这样,哎!!!
代码如下
和上一篇浅层神经网络很像,主要修改了init_parameters、forward_propagation、back_propagation和update_parameters,添加了relu激活函数
"""
@Time : 2019/9/26 20:29 PM
@Author : bjjoy2009
"""
import matplotlib.pyplot as plt
import numpy as np
def sigmoid(Z):
A = 1/(1 + np.exp(-Z))
return A
def relu(Z):
A = np.maximum(0, Z)
return A
def sigmoid_loss(y, al):
result = -1/len(y) * (np.dot(y, np.log(al).T) + np.dot(1-y, np.log(1-al).T))
return result
def load_planar_dataset():
np.random.seed(1)
m = 400 # number of examples
N = int(m/2) # number of points per class
D = 2 # dimensionality
X = np.zeros((m, D)) # data matrix where each row is a single example
Y = np.zeros((m, 1), dtype='uint8') # labels vector (0 for red, 1 for blue)
a = 4 # maximum ray of the flower
for j in range(2):
ix = range(N*j, N*(j+1))
t = np.linspace(j*3.12, (j+1)*3.12, N) + np.random.randn(N)*0.2 # theta
r = a*np.sin(4*t) + np.random.randn(N)*0.2 # radius
X[ix] = np.c_[r*np.sin(t), r*np.cos(t)]
Y[ix] = j
X = X.T
Y = Y.T
return X, Y
def plot_decision_boundary(model, X, y, title):
# Set min and max values and give it some padding
x_min, x_max = X[0, :].min() - 1, X[0, :].max() + 1
y_min, y_max = X[1, :].min() - 1, X[1, :].max() + 1
h = 0.01
# Generate a grid of points with distance h between them
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
# Predict the function value for the whole grid
Z = model(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
# Plot the contour and training examples
plt.contourf(xx, yy, Z, cmap=plt.cm.Spectral)
plt.ylabel('x2')
plt.xlabel('x1')
plt.scatter(X[0, :], X[1, :], c=y.ravel(), cmap=plt.cm.Spectral)
plt.title(title)
plt.show()
class DNN:
def __init__(self, X, Y, layer_dims, max_iter=10000, alpha=0.05, print_loss=False, activation='relu'):
"""
:param X: 训练集
:param Y: labels
:param layer_dims: 各个层节点数list,layer_dims[0]训练集特征数,layer_dims[L]=1输出层节点数
:param alpha: 梯度下降学习率
"""
self.X = X
self.Y = Y
self.layer_dims = layer_dims
self.max_iter = max_iter
self.alpha = alpha
self.m = self.X.shape[1]
self.L = len(layer_dims)
self.print_loss = print_loss
self.parameters = {}
self.activation = activation
def init_parameters(self):
parameters = {}
for l in range(1, self.L):
Wl = np.random.random((self.layer_dims[l], self.layer_dims[l-1])) * 0.01
bl = np.zeros((self.layer_dims[l], 1))
parameters['W' + str(l)] = Wl
parameters['b' + str(l)] = bl
return parameters
def forward_propagation(self, parameters, X):
cache = {'A0': self.X}
A_pre = X
for l in range(1, self.L-1):
Wl = parameters['W' + str(l)]
bl = parameters['b' + str(l)]
Zl = np.dot(Wl, A_pre) + bl
if self.activation == 'relu':
Al = relu(Zl) # relu做激活函数
elif self.activation == 'tanh':
Al = np.tanh(Zl) # tanh做激活函数
A_pre = Al
cache['A' + str(l)] = Al
cache['Z' + str(l)] = Zl
Wl = parameters['W' + str(self.L-1)]
bl = parameters['b' + str(self.L-1)]
Zl = np.dot(Wl, A_pre) + bl
Al = sigmoid(Zl)
cache['A' + str(self.L-1)] = Al
cache['Z' + str(self.L-1)] = Zl
return cache, Al
def back_propagation(self, parameters, cache):
grads = {}
# 输出层反向传播(L-1层)
L = self.L - 1
Al = cache['A' + str(L)]
Zl = cache['Z' + str(L)]
dZl = Al - self.Y
dWl = 1/self.m * np.dot(dZl, cache['A' + str(L-1)].T)
dbl = 1/self.m * np.sum(dZl, axis=1, keepdims=True)
grads['dW' + str(L)] = dWl
grads['db' + str(L)] = dbl
# 隐藏层和输入层反向传播(0~L-2层)
for l in reversed(range(1, L)):
dAl = np.dot(parameters['W' + str(l+1)].T, dZl)
if self.activation == 'relu':
dZl = np.array(dAl, copy=True) # relu做激活使用
dZl[cache['Z' + str(l)] <= 0] = 0 # relu做激活使用
elif self.activation == 'tanh':
dZl = np.multiply(dAl, (1-np.power(cache['A' + str(l)], 2))) # tanh做激活使用
dWl = 1/self.m * np.dot(dZl, cache['A' + str(l-1)].T)
dbl = 1/self.m * np.sum(dZl, axis=1, keepdims=True)
grads['dW' + str(l)] = dWl
grads['db' + str(l)] = dbl
return grads
def update_parameters(self, parameters, grads):
for l in range(1, self.L):
parameters['W' + str(l)] = parameters['W' + str(l)] - self.alpha * grads['dW' + str(l)]
parameters['b' + str(l)] = parameters['b' + str(l)] - self.alpha * grads['db' + str(l)]
return parameters
def fit(self):
parameters = self.init_parameters()
for i in range(self.max_iter):
cache, al = self.forward_propagation(parameters, self.X)
if self.print_loss and i % 1000 == 0:
loss = sigmoid_loss(self.Y, al)
print(i, loss)
grads = self.back_propagation(parameters, cache)
parameters = self.update_parameters(parameters, grads)
self.parameters = parameters
def predict(self, X):
cache, a2 = self.forward_propagation(self.parameters, X)
predicts = (a2 > 0.5)
return predicts
np.random.seed(1)
X, Y = load_planar_dataset()
nn = DNN(X=X, Y=Y, layer_dims=[2, 8, 8, 1], alpha=0.5, print_loss=False, activation='relu')
nn.fit()
predicts = nn.predict(X)
accuracy = float((np.dot(Y, predicts.T) + np.dot(1-Y, 1-predicts.T))/float(Y.size)*100)
print('Accuracy: %f ' % accuracy+ '%')
plot_decision_boundary(lambda x: nn.predict(x.T), X, Y, 'relu[2881]='+ str(accuracy)+ '%')