丢弃法——应对过拟合问题,并不改变输入的期望值
因为在每次训练的过程中,每一个隐藏单元都有可能被丢弃的概率,导致最后的训练结果无法完全依赖某一个或某几个隐藏单元对象,从而达到应对过拟合的目的
注:t.my_ones_packages是我自己根据《动手学深度学习》目前学习过程中出现的所有自定义函数进行集中得到的文档。
《动手学深度学习》也向读者提供了一个包含所有自定义函数的包“d2lzh”大家可以自行下载 侵删 链接如下 :link. 提取码: un5p 时间设置是永久的 如果失效可以联系我更新
丢弃法复杂实现
import t.my_ones_packages as mop
from mxnet import gluon, nd, autograd, init
from mxnet.gluon import nn, data as gdata, loss as gloss
def dropout(x, drop_prob):
assert 0 <= drop_prob <= 1 # assert表示不符合后面的表达式就程序终止
keep_prob = 1 - drop_prob
if keep_prob == 0:
return x.zeros_like() # x.zeros_like() 表示创建一个和x个数相应的全0矩阵 等价于 nd.zeros_like(x)
# nd.random.uniform(0,1,x.shape) 表示创建一个于x的形状一样的 元素随机在0-1之间取值的矩阵
mask = nd.random.uniform(0, 1, x.shape) < keep_prob
return mask * x / keep_prob
# 丢弃概率为0
'''
x = nd.arange(16).reshape((2, 8))
print(dropout(x, 0))
'''
# [[ 0. 1. 2. 3. 4. 5. 6. 7.]
# [ 8. 9. 10. 11. 12. 13. 14. 15.]]
# <NDArray 2x8 @cpu(0)>
# 丢弃概率为0.5
'''
x = nd.arange(16).reshape((2, 8))
print(dropout(x, 0.5))
'''
# [[ 0. 2. 4. 6. 0. 0. 0. 14.]
# [ 0. 18. 0. 0. 24. 26. 28. 0.]]
# <NDArray 2x8 @cpu(0)>
# 丢弃概率为1
'''
x = nd.arange(16).reshape((2, 8))
print(dropout(x, 1))
'''
# [[0. 0. 0. 0. 0. 0. 0. 0.]
# [0. 0. 0. 0. 0. 0. 0. 0.]]
# <NDArray 2x8 @cpu(0)>
# 定义模型参数
num_inputs, num_outputs, num_hiddens1, num_hiddens2 = 784, 10, 256, 256
W1 = nd.random.normal(scale=0.01, shape=(num_inputs, num_hiddens1))
b1 = nd.zeros(num_hiddens1)
W2 = nd.random.normal(scale=0.01, shape=(num_hiddens1, num_hiddens2))
b2 = nd.zeros(num_hiddens2)
W3 = nd.random.normal(scale=0.01, shape=(num_hiddens2, num_outputs))
b3 = nd.zeros(num_outputs)
params = [W1, b1, W2, b2, W3, b3]
for param in params :
param.attach_grad()
# 定义模型
drop_prob1, drop_prob2 = 0.2, 0.5
def net(x):
x = x.reshape((-1, num_inputs))
h1 = (nd.dot(x, W1) + b1).relu()
if autograd.is_training(): # autograd.is_training() 布尔值类型 用来判断当前的模式 是训练模式则返回True
h1 = dropout(h1, drop_prob1)
h2 = (nd.dot(h1, W2) + b2).relu()
if autograd.is_training():
h1 = dropout(h2, drop_prob2)
return nd.dot(h2, W3) + b3
num_epochs, lr, batch_size = 5, 0.5, 256
loss = gloss.SoftmaxCrossEntropyLoss()
train_iter, test_iter = mop.load_data_fashion_mnist(batch_size)
mop.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, param, lr)
#该函数在我的文章 深度学习笔记(2) 中有详细实现代码 可以自行查看
丢弃法的简单实现(Gluon实现)
import t.my_ones_packages as mop
from mxnet import autograd, nd, init, gluon
from mxnet.gluon import nn, loss as gloss, data as gdata
# 定义模型
drop_prob1, drop_prob2 = 0.2, 0.5
net = nn.Sequential()
net.add(nn.Dense(256, activation="relu"),
nn.Dropout(drop_prob1),
nn.Dense(256, activation="relu"),
nn.Dropout(drop_prob2),
nn.Dense(10))
# 等价于
# net.add(nn.Dense(256, activation="relu"))
# net.add(nn.Dense(drop_prob1))
# net.add(nn.Dense(256, activation="relu"))
# net.add(nn.Dense(drop_prob2))
net.initialize(init.Normal(sigma=0.01))
num_epochs, lr, batch_size = 5, 0.5, 256
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr})
loss = gloss.SoftmaxCrossEntropyLoss()
train_iter, test_iter = mop.load_data_fashion_mnist(batch_size)
mop.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, None, None, trainer)
#该函数在我的文章 深度学习笔记(2) 中有详细实现代码 可以自行查看