前言
本节学习多层感知机
基本可以算是最简单基础的神经网络
用到mxnet库和d2lzh库
1、原理
多层感知机
- 在单层神经⽹络的基础上引⼊了⼀到多个隐藏层(hidden layer)
- 隐藏层位于输⼊层和输出层之间
- 隐藏层和输出层都是全连接层
激活函数
- 全连接层只是对数据做仿射变换(affine transformation)
- 多个仿射变换的叠加仍然是⼀个仿射变换
- 隐藏变量使⽤按元素运算的⾮线性函数进⾏变换
- ⾮线性函数被称为激活函数(activation function)
ReLU函数
sigmoid函数
tanh函数
2、实现
import d2lzh as d2l
from mxnet import nd
from mxnet.gluon import loss as gloss
"""多层感知机的实现"""
# 数据,Fashion-MNIST数据集
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
# 模型参数
# Fashion-MNIST数据集中图像形状为2828,类别数为10
# 设置隐藏单元个数为256
num_inputs, num_outputs, num_hiddens = 784, 10, 256
W1 = nd.random.normal(scale=0.01, shape=(num_inputs, num_hiddens))
b1 = nd.zeros(num_hiddens)
W2 = nd.random.normal(scale=0.01, shape=(num_hiddens, num_outputs))
b2 = nd.zeros(num_outputs)
params = [W1, b1, W2, b2]
# 计算梯度
for param in params:
param.attach_grad()
# 激活函数ReLU
def relu(X):
return nd.maximum(X, 0)
# 神经网络模型
def net(X):
X = X.reshape((-1, num_inputs))
H = relu(nd.dot(X, W1) + b1)
return nd.dot(H, W2) + b2
# 损失函数
loss = gloss.SoftmaxCrossEntropyLoss()
# 模型训练
num_epochs, lr = 5, 0.5
d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size,
params, lr)
简洁版
不得不说现在各个深度学习框架都封装了一大批各种用途的函数
难怪有说搞ai的都是无情的api调用机器和调参机器
import d2lzh as d2l
from mxnet import gluon, init
from mxnet.gluon import loss as gloss, nn
# 数据
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
# 模型
net = nn.Sequential()
net.add(nn.Dense(256, activation='relu'),
nn.Dense(10))
net.initialize(init.Normal(sigma=0.01))
# 训练
loss = gloss.SoftmaxCrossEntropyLoss()
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.5})
num_epochs = 5
d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, None,
None, trainer)
3、丢弃法
使用丢弃法应对过拟合
- 由于在训练中隐藏层神经元的丢弃是随机的,即h1… h5都有可能被清零
- 输出层的计算⽆法过度依赖h1……h5中的任⼀个
import d2lzh as d2l
from mxnet import autograd, gluon, init, nd
from mxnet.gluon import loss as gloss, nn
"""丢弃法"""
# 数据,Fashion-MNIST数据集
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
# dropout
def dropout(X, drop_prob):
assert 0 <= drop_prob <= 1 #丢弃概率
keep_prob = 1 - drop_prob
# 这种情况下把全部元素都丢弃
if keep_prob == 0:
return X.zeros_like()
mask = nd.random.uniform(0, 1, X.shape) < keep_prob
return mask * X / keep_prob
# 模型参数
num_inputs, num_outputs, num_hiddens1, num_hiddens2 = 784, 10, 256, 256
W1 = nd.random.normal(scale=0.01, shape=(num_inputs, num_hiddens1))
b1 = nd.zeros(num_hiddens1)
W2 = nd.random.normal(scale=0.01, shape=(num_hiddens1, num_hiddens2))
b2 = nd.zeros(num_hiddens2)
W3 = nd.random.normal(scale=0.01, shape=(num_hiddens2, num_outputs))
b3 = nd.zeros(num_outputs)
params = [W1, b1, W2, b2, W3, b3]
for param in params:
param.attach_grad()
# 神经网络
drop_prob1, drop_prob2 = 0.2, 0.5
def net(X):
X = X.reshape((-1, num_inputs))
H1 = (nd.dot(X, W1) + b1).relu()
if autograd.is_training(): # 只在训练模型时使用丢弃法
H1 = dropout(H1, drop_prob1) # 在第一层全连接后添加丢弃层
H2 = (nd.dot(H1, W2) + b2).relu()
if autograd.is_training():
H2 = dropout(H2, drop_prob2) # 在第二层全连接后添加丢弃层
return nd.dot(H2, W3) + b3
# 训练
num_epochs, lr, batch_size = 5, 0.5, 256
loss = gloss.SoftmaxCrossEntropyLoss()
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size,
params, lr)
简洁版
import d2lzh as d2l
from mxnet import autograd, gluon, init, nd
from mxnet.gluon import loss as gloss, nn
# 数据,Fashion-MNIST数据集
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
# dro
# 模型
net = nn.Sequential()
net.add(nn.Dense(256, activation="relu"),
nn.Dropout(drop_prob1), # 在第一个全连接层后添加丢弃层
nn.Dense(256, activation="relu"),
nn.Dropout(drop_prob2), # 在第二个全连接层后添加丢弃层
nn.Dense(10))
net.initialize(init.Normal(sigma=0.01))
# 训练
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr})
d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, None,
None, trainer)
结语
通过最基础的多层感知机
熟悉了下mxnet和d2lzh这两个库
之后陆续学习CNN、RNN等深度学习常用网络