简单多层神经网络推导及实现

1. 前向传播

2. 反向传播

2.1三层网络
2.2多层网络

3. 正则化

3.1 L1 Regularization
3.2 L2 Regularization

4. 实验结果

5. python实现

# -*- coding: utf-8 -*-
"""
Created on Fri Jan  3 14:29:06 2020

@author: fenghui
"""
import numpy as np
import matplotlib.pyplot as plt
import h5py
import time

#activation function
def sigmoid(x):
    if (x>0).all():
        return 1.0/(1.0+np.exp(-x))
    else:
        return np.exp(x)/(1.0+np.exp(x))
def tanh(x):
    return (np.exp(x)-np.exp(-x))/(np.exp(x)+np.exp(-x))
def relu(x):
    return np.maximum(0, x) #输出(0,x)中大的数

train_dataset = h5py.File('datasets/train_catvnoncat.h5', 'r')
train_set_x_orig = np.array( train_dataset['train_set_x'][:] )
train_set_y_orig = np.array( train_dataset['train_set_y'][:] )

test_dataset = h5py.File( 'datasets/test_catvnoncat.h5', 'r' )
test_set_x_orig = np.array( test_dataset['test_set_x'][:] )   #测试集
test_set_y_orig = np.array( test_dataset['test_set_y'][:] )   #测试集标记

#展成行向量
train_set_y_orig = train_set_y_orig.reshape(1,-1)
test_set_y_orig = test_set_y_orig.reshape(1,-1)
#each sample is a column vector
train_set_x_flatten = train_set_x_orig.reshape(train_set_x_orig.shape[0], -1).T   
test_set_x_flatten = test_set_x_orig.reshape(test_set_x_orig.shape[0], -1).T      

train_set_x =train_set_x_flatten/255
test_set_x =test_set_x_flatten/255

#样本数量
nTrain = train_set_x.shape[1]
nTest = test_set_x.shape[1]
n_x = train_set_x.shape[0]

#Hyper Parameters 
Iterations = 2000   #iteration numbers
alpha = 0.0075  #learnning rate
Layers = [n_x, 20,7,5,1]   #nn units of hidden layer and output layer 
nL = len(Layers)-1         #nn层数
lambd = 15              #正则化惩罚系数   
Y = train_set_y_orig
#initialize Parameters
W = [[] for i in range(len(Layers))]
b = [[] for i in range(len(Layers))]
for i in np.arange( nL )+1:
    W[i] =  np.random.randn(Layers[i], Layers[i-1])/Layers[i-1]**0.5
    b[i] =  np.zeros( (Layers[i], 1)) 
dW = W.copy()
db = b.copy()
#initialize Cache
Z = []
A = []    
for i in range(len(Layers)):
    A.append( np.zeros((Layers[i], nTrain)) )
    Z.append( np.zeros((Layers[i], nTrain)) )
A[0] = train_set_x
dZ = Z.copy()
dA = A.copy()
cost = []

tic = time.time()
for i in range(Iterations):
    #forward propagation
    for l in np.arange( nL )+1:
        Z[l] = np.dot(W[l], A[l-1])+b[l]  #线性输出
        if l==nL:
            A[l] = sigmoid(Z[l])       #输出层激活函数使用sigmoid  输出为0-1
        else:                          #隐层激活函数使用relu
            A[l] = relu(Z[l])
    #backard propagation
    dZ[nL] = (A[nL]-Y)/nTrain  #输出层dZ计算
    for l in nL-np.arange(nL):                #隐层梯度计算
        dW[l] = np.dot(dZ[l], A[l-1].T) + (lambd/nTrain)*W[l]
        db[l] = np.sum(dZ[l], axis=1, keepdims=True)
        if l>1:
            dA[l-1] = np.dot(W[l].T, dZ[l])  #计算前一层的dA、dZ
            dZ[l-1] = dA[l-1].copy()
            dZ[l-1][Z[l-1]<0] = 0
    for l in range(1,nL+1):
        W[l] = W[l]-alpha*dW[l]     #更新参数
        b[l] = b[l]-alpha*db[l]
    
    if i%100==0:
        sum_W = 0
        for l in range(1,nL+1):
            sum_W += np.sum(W[l]**2)
            
        current_cost = -np.sum(( Y*np.log(A[nL])+(1-Y)*np.log(1-A[nL]) ) )/nTrain
        +lambd/(2*nTrain)*sum_W
        cost.append(current_cost)
        print( "Iterations:"+str(i)+"----"+"cost:"+str(current_cost) )
    
toc = time.time()
print("neural network running time is:"+str(toc-tic))
train_err_cnt = np.sum( A[nL][Y==1]<0.5 )+np.sum(A[nL][Y==0]>0.5)
print("train error rate is:"+str(train_err_cnt/nTrain))

Yhat = test_set_x
Y = test_set_y_orig
for l in range(1, nL+1):
    Yhat = np.dot(W[l], Yhat)+b[l]

test_err_cnt = np.sum(Yhat[Y==1]<0.5)+np.sum(Yhat[Y==0]>0.5)
print( "test error rate is:"+str(test_err_cnt/nTest) )
plt.plot(cost)
plt.show()   
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值