1. 前向传播
2. 反向传播
2.1三层网络
2.2多层网络
3. 正则化
3.1 L1 Regularization
3.2 L2 Regularization
4. 实验结果
5. python实现
"""
Created on Fri Jan 3 14:29:06 2020
@author: fenghui
"""
import numpy as np
import matplotlib.pyplot as plt
import h5py
import time
def sigmoid(x):
if (x>0).all():
return 1.0/(1.0+np.exp(-x))
else:
return np.exp(x)/(1.0+np.exp(x))
def tanh(x):
return (np.exp(x)-np.exp(-x))/(np.exp(x)+np.exp(-x))
def relu(x):
return np.maximum(0, x)
train_dataset = h5py.File('datasets/train_catvnoncat.h5', 'r')
train_set_x_orig = np.array( train_dataset['train_set_x'][:] )
train_set_y_orig = np.array( train_dataset['train_set_y'][:] )
test_dataset = h5py.File( 'datasets/test_catvnoncat.h5', 'r' )
test_set_x_orig = np.array( test_dataset['test_set_x'][:] )
test_set_y_orig = np.array( test_dataset['test_set_y'][:] )
train_set_y_orig = train_set_y_orig.reshape(1,-1)
test_set_y_orig = test_set_y_orig.reshape(1,-1)
train_set_x_flatten = train_set_x_orig.reshape(train_set_x_orig.shape[0], -1).T
test_set_x_flatten = test_set_x_orig.reshape(test_set_x_orig.shape[0], -1).T
train_set_x =train_set_x_flatten/255
test_set_x =test_set_x_flatten/255
nTrain = train_set_x.shape[1]
nTest = test_set_x.shape[1]
n_x = train_set_x.shape[0]
Iterations = 2000
alpha = 0.0075
Layers = [n_x, 20,7,5,1]
nL = len(Layers)-1
lambd = 15
Y = train_set_y_orig
W = [[] for i in range(len(Layers))]
b = [[] for i in range(len(Layers))]
for i in np.arange( nL )+1:
W[i] = np.random.randn(Layers[i], Layers[i-1])/Layers[i-1]**0.5
b[i] = np.zeros( (Layers[i], 1))
dW = W.copy()
db = b.copy()
Z = []
A = []
for i in range(len(Layers)):
A.append( np.zeros((Layers[i], nTrain)) )
Z.append( np.zeros((Layers[i], nTrain)) )
A[0] = train_set_x
dZ = Z.copy()
dA = A.copy()
cost = []
tic = time.time()
for i in range(Iterations):
for l in np.arange( nL )+1:
Z[l] = np.dot(W[l], A[l-1])+b[l]
if l==nL:
A[l] = sigmoid(Z[l])
else:
A[l] = relu(Z[l])
dZ[nL] = (A[nL]-Y)/nTrain
for l in nL-np.arange(nL):
dW[l] = np.dot(dZ[l], A[l-1].T) + (lambd/nTrain)*W[l]
db[l] = np.sum(dZ[l], axis=1, keepdims=True)
if l>1:
dA[l-1] = np.dot(W[l].T, dZ[l])
dZ[l-1] = dA[l-1].copy()
dZ[l-1][Z[l-1]<0] = 0
for l in range(1,nL+1):
W[l] = W[l]-alpha*dW[l]
b[l] = b[l]-alpha*db[l]
if i%100==0:
sum_W = 0
for l in range(1,nL+1):
sum_W += np.sum(W[l]**2)
current_cost = -np.sum(( Y*np.log(A[nL])+(1-Y)*np.log(1-A[nL]) ) )/nTrain
+lambd/(2*nTrain)*sum_W
cost.append(current_cost)
print( "Iterations:"+str(i)+"----"+"cost:"+str(current_cost) )
toc = time.time()
print("neural network running time is:"+str(toc-tic))
train_err_cnt = np.sum( A[nL][Y==1]<0.5 )+np.sum(A[nL][Y==0]>0.5)
print("train error rate is:"+str(train_err_cnt/nTrain))
Yhat = test_set_x
Y = test_set_y_orig
for l in range(1, nL+1):
Yhat = np.dot(W[l], Yhat)+b[l]
test_err_cnt = np.sum(Yhat[Y==1]<0.5)+np.sum(Yhat[Y==0]>0.5)
print( "test error rate is:"+str(test_err_cnt/nTest) )
plt.plot(cost)
plt.show()