使用Dropout防止神经网络过拟合(纯numpy实现)

1、原理简述

dropout简单来讲,就是在迭代的过程中,随机的丢弃掉某些神经元,使得其训练只包含部分神经元的网络,因为任何神经元都有可能消失,所以模型会变得对神经元不那么敏感,表现就是对参数W的压缩,起到与L2正则化类似的作用。
执行步骤:
1->随机生成一个数值在(0-1)之间的,与A[l]维数相同的矩阵,drop_prob是保留概率

D[l] = np.random.rand(A[l].shape[0],A[l].shape[1])<drop_prob

2->丢弃部分神经元(前向传播)

A[l] = A[l]*D[l]
A[l] = A[l]/drop_prob  #保持期望值一样

3->丢弃部分神经元(后向传播)

dA[l] = dA[l]*D[l]
dA[l] = dA[l]/drop_prob  #保持期望值一样

2、实验结果

不使用正则化,即drop_prob=1.0

"
训练正确率:1.0
测试错误率:0.925

使用Dropout正则化,drop_prob=0.6

训练正确率:0.919431279620853
测试错误率:0.95

以上可以看出,Dropout正则化可以有效的抑制神经网络过拟合

3、python实现

import numpy as np
import matplotlib.pyplot as plt
import nn_utils 
train_x,train_y,test_x,test_y = nn_utils.load_2D_dataset()#导入数据
nTrain = train_x.shape[1]
nTest = test_x.shape[1]
#初始化超参数----------------------------------------
layers = [train_x.shape[0], 50, 35, 20, 10, 1]  #网络结构
alpha = 0.02  #学习率
drop_prob = 0.6   #神经元保留概率
Iterations = 40000  #迭代次数
nLayers = len(layers)-1 #网络层数
#初始化W,b-------------------------------------------
W = [[] for i in range(nLayers+1)]
b = [[] for i in range(nLayers+1)]
for l in range(1,nLayers+1):
    W[l] = np.random.randn(layers[l],layers[l-1])/(np.sqrt(layers[l-1]/2))
    b[l] = np.zeros((layers[l],1))
dW = W.copy()
db = b.copy()
#初始化Cache-----------------------------------------
A = [[] for i in range(nLayers+1)]
Z = [[] for i in range(nLayers+1)]
for l in range(1,nLayers+1):
    A[l] = np.zeros((layers[l],nTrain))
    Z[l] = np.zeros((layers[l],nTrain))
    print(A[l].shape)
    print(Z[l].shape)
dA = A.copy()
dZ = Z.copy()
D = A.copy()  #drop矩阵
A[0] = train_x
cost = []
#迭代训练-------------------------------------------------
for i in range(Iterations):
    for l in range(1,nLayers+1):  #前向传播
        Z[l] = np.dot(W[l],A[l-1])+b[l]  
        if l==nLayers:
            A[l] = nn_utils.sigmoid(Z[l])  
        else:
            A[l] = nn_utils.relu(Z[l])
            D[l] = np.random.rand(A[l].shape[0],A[l].shape[1])<drop_prob
            A[l] = A[l]*D[l]   #随机丢弃部分神经元 简化网络
            A[l] /= drop_prob
    dZ[nLayers] = (A[nLayers]-train_y)/nTrain
    for l in np.arange(nLayers,0,-1):
        dW[l] = np.dot(dZ[l], A[l-1].T)
        db[l] = np.sum(dZ[l], axis=1, keepdims=True)
        if l>1:
            dA[l-1] = np.dot(W[l].T, dZ[l])*D[l-1]
            dA[l-1] /= drop_prob
            dZ[l-1] = dA[l-1].copy()
            dZ[l-1][Z[l-1]<0] = 0
    for l in range(1,nLayers+1):
        W[l] -=alpha*dW[l]
        b[l] -= alpha*db[l]
    if i%2000==0:
        cost_cur = -np.sum( train_y*np.log(A[nLayers]+0.0001)+(1-train_y)*np.log(1-A[nLayers]+0.0001) )/nTrain
        cost.append(cost_cur)
        print("迭代次数:"+str(i)+"---cost:"+str(cost_cur))        

#计算精度-------------------------------------------------
train_err = np.sum(A[nLayers][train_y==1]<=0.5)+np.sum(A[nLayers][train_y==0]>=0.5)
print("训练正确率:"+str(1-train_err/nTrain))
predict_A = test_x
for l in range(1,nLayers+1):
    predict_Z = np.dot(W[l], predict_A)+b[l]
    if l==nLayers:
        predict_A = nn_utils.sigmoid(predict_Z)
    else:
        predict_A = nn_utils.relu(predict_Z)
test_err = np.sum(predict_A[test_y==1]<=0.5)+np.sum(predict_A[test_y==0]>0.5)
print("测试错误率:"+str(1-test_err/nTest))
nn_utils.plot_decision_boundary(W, b, train_x, train_y)

nn_utils.py

import numpy as np
import matplotlib.pyplot as plt
import scipy.io as sio

def relu(x):
    return np.maximum(x,0)
def sigmoid(x):
    if (x>0).all():
        return 1.0/(1+np.exp(-x))
    else:
        return np.exp(x)/(1+np.exp(x))
def plot_decision_boundary(W, b, X, Y):
    x_min, x_max = X[0,:].min(), X[0,:].max()  #取得绘图数值范围
    y_min, y_max = X[1,:].min(), X[1,:].max()
    step = 0.01   #网格精度
    xx,yy = np.meshgrid( np.arange(x_min,x_max,step), np.arange(y_min,y_max,step) )  #生成一张网格
    plot_samples = np.array( [xx.ravel(),yy.ravel()] )
    A = plot_samples.copy()
    for l in range(1, len(W)):
        Z = np.dot(W[l], A)+b[l]
        if l==len(W)-1:
            A = sigmoid(Z)
        else:
            A = relu(Z)
    A[A>0.5] = 1
    A[A<=0.5] = 0
    A =A.reshape(xx.shape)
    plt.contourf(xx, yy, A, cmap=plt.cm.Spectral)
    plt.xlabel('x1')
    plt.ylabel('y2')
    plt.scatter(X[0,:], X[1,:], c=Y[0,:])
    plt.show()
    
def load_2D_dataset(is_plot=True):
    data = sio.loadmat('data.mat')
    train_X = data['X'].T
    train_Y = data['y'].T
    test_X = data['Xval'].T
    test_Y = data['yval'].T
    if is_plot:
        plt.scatter(train_X[0, :], train_X[1, :], c=train_Y[0,:], s=40, cmap=plt.cm.Spectral);
    return train_X, train_Y, test_X, test_Y
发布了4 篇原创文章 · 获赞 0 · 访问量 67
展开阅读全文

没有更多推荐了,返回首页

©️2019 CSDN 皮肤主题: 1024 设计师: 上身试试

分享到微信朋友圈

×

扫一扫,手机浏览