吴恩达DeepLearning第二部分作业week1(2)正则化、dropout

这部分作业主要是为了体现正则化和dropout防止过拟合的作用(低偏差,高方差)

不使用任何优化方法:

import numpy as np
import matplotlib.pyplot as plt
import reg_utils


def init(x):
    first_num = 20
    second_num = 7
    third_num = 3
    np.random.seed(5)
    w1 = np.random.randn(first_num, x.shape[0]) * np.sqrt(2 / x.shape[0])
    b1 = np.zeros((first_num, 1))
    w2 = np.random.randn(second_num, first_num) * np.sqrt(2 / first_num)
    b2 = np.zeros((second_num, 1))
    w3 = np.random.randn(third_num, second_num) * np.sqrt(2 / second_num)
    b3 = np.zeros((third_num, 1))
    w4 = np.random.randn(1, third_num) * np.sqrt(2 / third_num)
    b4 = np.zeros((1, 1))
    ini_param = {
        "w1": w1,
        "b1": b1,
        "w2": w2,
        "b2": b2,
        "w3": w3,
        "b3": b3,
        "w4": w4,
        "b4": b4
    }
    return ini_param


# 构建前向
def cal_z(w, a, b):  # a维度:上层节点数*例子数 w维度:该层节点数*上层节点数
    return np.dot(w, a) + b


def cal_sigma(z):  # z维度:该层节点数*例子数
    return 1 / (1 + np.exp(-z))


def cal_relu(z):  # z维度:该层节点数*例子数
    return np.maximum(0, z)


def forward_f(x, p):
    z1 = cal_z(p["w1"], x, p["b1"])
    a1 = cal_relu(z1)
    z2 = cal_z(p["w2"], a1, p["b2"])
    a2 = cal_relu(z2)
    z3 = cal_z(p["w3"], a2, p["b3"])
    a3 = cal_relu(z3)
    z4 = cal_z(p["w4"], a3, p["b4"])
    a4 = cal_sigma(z4)
    forward_param = {
        "z1": z1,
        "a1": a1,
        "z2": z2,
        "a2": a2,
        "z3": z3,
        "a3": a3,
        "z4": z4,
        "a4": a4
    }
    return forward_param

def cost_f(a, y):  # a维度: 1*例子数 y维度:1*例子数
    m = y.shape[1]
    return -np.sum(y * np.log(a) + (1 - y) * np.log(1 - a)) / m

def cal_dz_last(a, y):  # ai维度:第i层节点数*例子数 y维度:1*例子数
    return a - y


def cal_dw_db(dz, a, m):  # dzi维度:第i层节点数*例子数 ai维度:第i层节点数*例子数 wi维度:第i层节点数*第i-1层节点数
    return np.dot(dz, a.T) / m, np.sum(dz, axis=1, keepdims=True) / m



def cal_da(dz, w):  # dzi维度:第i层节点数*例子数 wi维度:第i层节点数*第i-1层节点数
    return np.dot(w.T, dz)


def cal_drelu(da, z):  # dai维度:第i层节点数*例子数 zi维度:第i层节点数*例子数
    t = np.ones(z.shape)
    t[z <= 0] = 0
    return da * t


def back_f(p, f_p, x, y):
    dz4 = cal_dz_last(f_p["a4"], y)
    dw4, db4 = cal_dw_db(dz4, f_p["a3"], y.shape[1])

    da3 = cal_da(dz4, p["w4"])
    dz3 = cal_drelu(da3, f_p["z3"])
    dw3, db3 = cal_dw_db(dz3, f_p["a2"], y.shape[1])

    da2 = cal_da(dz3, p["w3"])
    dz2 = cal_drelu(da2, f_p["z2"])
    dw2, db2 = cal_dw_db(dz2, f_p["a1"], y.shape[1])

    da1 = cal_da(dz2, p["w2"])
    dz1 = cal_drelu(da1, f_p["z1"])
    dw1, db1 = cal_dw_db(dz1, x, y.shape[1])

    back_param = {
        "dw4": dw4,
        "db4": db4,
        "dw3": dw3,
        "db3": db3,
        "dw2": dw2,
        "db2": db2,
        "dw1": dw1,
        "db1": db1
    }
    return back_param


def update_p(p, b_p, learning_rate):
    upd_p = {
        "w1": p["w1"] - learning_rate * b_p["dw1"],
        "b1": p["b1"] - learning_rate * b_p["db1"],
        "w2": p["w2"] - learning_rate * b_p["dw2"],
        "b2": p["b2"] - learning_rate * b_p["db2"],
        "w3": p["w3"] - learning_rate * b_p["dw3"],
        "b3": p["b3"] - learning_rate * b_p["db3"],
        "w4": p["w4"] - learning_rate * b_p["dw4"],
        "b4": p["b4"] - learning_rate * b_p["db4"]
    }
    return upd_p


# 建模
def model(x, y, learning_rate, loop_num):
    p = init(x)
    cost = []
    for i in range(loop_num):
        f_p = forward_f(x, p)
        b_p = back_f(p, f_p, x, y)
        p = update_p(p, b_p, learning_rate)
        if i % 1000 == 0:
            cost.append(cost_f(f_p["a4"], y))
    return p, cost

def print_figure(x, y, final_p, _cost):
    x_min, x_max = x[0, :].min() - 1, x[0, :].max() + 1
    y_min, y_max = x[1, :].min() - 1, x[1, :].max() + 1
    h = 0.01
    # Generate a grid of points with distance h between them
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
    # Predict the function value for the whole grid
    z = forward_f(np.vstack((xx.ravel(), yy.ravel())), final_p)
    z = np.round(z["a4"]).reshape(xx.shape)
    # Plot the contour and training examples
    plt.contourf(xx, yy, z, cmap=plt.cm.Spectral)
    plt.ylabel('x2')
    plt.xlabel('x1')
    plt.scatter(x[0, :], x[1, :], c=y, cmap=plt.cm.Spectral)
    plt.figure()
    plt.plot(_cost)
    plt.show()

读取数据,训练评估:

train_X, train_Y, test_X, test_Y = reg_utils.load_2D_dataset(is_plot=False)
print(train_X.shape)
print(train_Y.shape)
model_p, cost = model(train_X, train_Y, 0.23, 20000)
test_f_p = forward_f(test_X, model_p)
train_f_p = forward_f(train_X, model_p)
print("训练集准确度:", 100 * (1 - np.sum(np.abs(np.round(train_f_p["a4"]) - train_Y)) / train_Y.shape[1]), '%')
print("测试集准确度:", 100 * (1 - np.sum(np.abs(np.round(test_f_p["a4"]) - test_Y)) / test_Y.shape[1]), '%')
print_figure(test_X,test_Y,model_p,cost)

训练集准确度: 99.0521327014218 %
测试集准确度: 92.5 %

从图像上看有些过拟合

L2正则化:可以有效的防止过拟合

L2正则化前向不变,损失函数增加一项权重系数的2范数平方

def cost_f_l2(a, y, lamd, p):  # a维度: 1*例子数 y维度:1*例子数 w为w1,w2,w3,w4整合
    m = y.shape[1]
    s = np.sum(p["w1"] ** 2) + np.sum(p["w2"] ** 2) + np.sum(p["w3"] ** 2) + np.sum(p["w4"] ** 2)
    return -np.sum(y * np.log(a) + (1 - y) * np.log(1 - a)) / m + lamd * s / (2 * m)  # L2正则化项

因此,后向对权重系数w求导也要加上相应的导数项

def cal_dw_db_l2(dz, a, m, lamd, w):  # dzi维度:第i层节点数*例子数 ai维度:第i层节点数*例子数 wi维度:第i层节点数*第i-1层节点数
    return np.dot(dz, a.T) / m + lamd * w / m, np.sum(dz, axis=1, keepdims=True) / m

后向添加lambda参数,建模修改引入lambda参数

def back_f_l2(p, f_p, x, y, lamd):
    dz4 = cal_dz_last(f_p["a4"], y)
    dw4, db4 = cal_dw_db_l2(dz4, f_p["a3"], y.shape[1], lamd, p["w4"])

    da3 = cal_da(dz4, p["w4"])
    dz3 = cal_drelu(da3, f_p["z3"])
    dw3, db3 = cal_dw_db_l2(dz3, f_p["a2"], y.shape[1], lamd, p["w3"])

    da2 = cal_da(dz3, p["w3"])
    dz2 = cal_drelu(da2, f_p["z2"])
    dw2, db2 = cal_dw_db_l2(dz2, f_p["a1"], y.shape[1], lamd, p["w2"])

    da1 = cal_da(dz2, p["w2"])
    dz1 = cal_drelu(da1, f_p["z1"])
    dw1, db1 = cal_dw_db_l2(dz1, x, y.shape[1], lamd, p["w1"])

    back_param = {
        "dw4": dw4,
        "db4": db4,
        "dw3": dw3,
        "db3": db3,
        "dw2": dw2,
        "db2": db2,
        "dw1": dw1,
        "db1": db1
    }
    return back_param


def model_l2(x, y, learning_rate, loop_num, lamd):
    p = init(x)
    cost = []
    for i in range(loop_num):
        f_p = forward_f(x, p)
        b_p = back_f_l2(p, f_p, x, y, lamd)
        p = update_p(p, b_p, learning_rate)
        if i % 1000 == 0:
            cost.append(cost_f_l2(f_p["a4"], y, lamd, p))
    return p, cost

以上代码替换没有正则化代码相应部分即可,读取数据训练

train_X, train_Y, test_X, test_Y = reg_utils.load_2D_dataset(is_plot=False)
print(train_X.shape)
print(train_Y.shape)
lamd=1.2
model_p_l2, cost = model_l2(train_X, train_Y, 0.23, 20000,lamd)
test_f_p_l2 = forward_f(test_X, model_p_l2)
train_f_p_l2 = forward_f(train_X, model_p_l2)
print("训练集准确度:", 100 * (1 - np.sum(np.abs(np.round(train_f_p_l2["a4"]) - train_Y)) / train_Y.shape[1]), '%')
print("测试集准确度:", 100 * (1 - np.sum(np.abs(np.round(test_f_p_l2["a4"]) - test_Y)) / test_Y.shape[1]), '%')
print_figure(train_X, train_Y, model_p_l2, cost)

训练集准确度: 92.89099526066352 %
测试集准确度: 93.0 %

明显训练集准确度下降,测试集准确度上升,缓解过拟合现象

Dropout:抛硬币关闭隐藏层节点

【中文】【吴恩达课后编程作业】Course 2 - 改善深层神经网络 - 第一周作业(1&2&3)_何宽的博客-CSDN博客

      这里原理讲的很详细,但是代码都是调用作业提供的函数,我觉得代码还是自己手搓一遍比较利于理解。

dropout需要修改的地方:

前向时就要随机按概率关闭节点,并保存关闭信息传递给后向

def forward_f_dropout(x, p, drop_np):
    z1 = cal_z(p["w1"], x, p["b1"])
    a1 = cal_relu(z1)
    d1 = np.random.rand(a1.shape[0],a1.shape[1])
    d1 = d1 < drop_np[0]
    a1 = a1 * d1
    a1 = a1 / drop_np[0]  # 缩放
    z2 = cal_z(p["w2"], a1, p["b2"])
    a2 = cal_relu(z2)
    d2 = np.random.rand(a2.shape[0],a2.shape[1])
    d2 = d2 < drop_np[1]
    a2 = a2 * d2
    a2 = a2 / drop_np[1]
    z3 = cal_z(p["w3"], a2, p["b3"])
    a3 = cal_relu(z3)
    d3 = np.random.rand(a3.shape[0],a3.shape[1])
    d3 = d3 < drop_np[2]
    a3 = a3 * d3
    a3 = a3 / drop_np[2]
    z4 = cal_z(p["w4"], a3, p["b4"])
    a4 = cal_sigma(z4)  # 输出层就一个节点不用丢弃
    drop_para = {
        "d1": d1,
        "d2": d2,
        "d3": d3
    }
    forward_param = {
        "z1": z1,
        "a1": a1,
        "z2": z2,
        "a2": a2,
        "z3": z3,
        "a3": a3,
        "z4": z4,
        "a4": a4
    }
    return drop_para, forward_param

损失函数不做变化,后向同样按前向关闭节点并缩放,重新建模引入关闭概率参数

def back_f_dropout(p, f_p, x, y, drop_para, drop_np):  # drop_para随机的向量决定关闭节点,drop_np 概率
    dz4 = cal_dz_last(f_p["a4"], y)
    dw4, db4 = cal_dw_db(dz4, f_p["a3"], y.shape[1])

    da3 = cal_da(dz4, p["w4"])
    da3 = da3 * drop_para["d3"]
    da3 = da3 / drop_np[2]
    dz3 = cal_drelu(da3, f_p["z3"])
    dw3, db3 = cal_dw_db(dz3, f_p["a2"], y.shape[1])

    da2 = cal_da(dz3, p["w3"])
    da2 = da2 * drop_para["d2"]
    da2 = da2 / drop_np[1]
    dz2 = cal_drelu(da2, f_p["z2"])
    dw2, db2 = cal_dw_db(dz2, f_p["a1"], y.shape[1])

    da1 = cal_da(dz2, p["w2"])
    da1 = da1 * drop_para["d1"]
    da1 = da1 / drop_np[0]
    dz1 = cal_drelu(da1, f_p["z1"])
    dw1, db1 = cal_dw_db(dz1, x, y.shape[1])

    back_param = {
        "dw4": dw4,
        "db4": db4,
        "dw3": dw3,
        "db3": db3,
        "dw2": dw2,
        "db2": db2,
        "dw1": dw1,
        "db1": db1
    }
    return back_param


def model_dropout(x, y, learning_rate, loop_num, drop_np):
    p = init(x)
    cost = []
    for i in range(loop_num):
        d_p, f_p = forward_f_dropout(x, p, drop_np)
        b_p = back_f_dropout(p, f_p, x, y, d_p, drop_np)
        p = update_p(p, b_p, learning_rate)
        if i % 1000 == 0:
            cost.append(cost_f(f_p["a4"], y))
    return p, cost

读取数据,训练

train_X, train_Y, test_X, test_Y = reg_utils.load_2D_dataset(is_plot=False)
print(train_X.shape)
print(train_Y.shape)
drop_prob = np.array([0.7, 0.8, 0.9])
model_p_dropout, cost = model_dropout(train_X, train_Y, 0.23, 20000, drop_prob)
test_f_p_dropout = forward_f(test_X, model_p_dropout)  # 测试时不能用dropout
d_p,train_f_p_dropout = forward_f_dropout(train_X, model_p_dropout, drop_prob)
print("训练集准确度:", 100 * (1 - np.sum(np.abs(np.round(train_f_p_dropout["a4"]) - train_Y)) / train_Y.shape[1]), '%')
print("测试集准确度:", 100 * (1 - np.sum(np.abs(np.round(test_f_p_dropout["a4"]) - test_Y)) / test_Y.shape[1]), '%')
print_figure(test_X, test_Y, model_p_dropout, cost)

训练集准确度: 92.89099526066352 %
测试集准确度: 95.0 %

测试集准确度进一步上升

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值