多层神经网络
对应的矩阵计算公式如下:
W
h
,
b
h
W_h,b_h
Wh,bh表示第一层权重和偏移
W
o
,
b
o
W_o,b_o
Wo,bo表示第二层权重和偏移
H
=
X
W
h
+
b
h
H=XW_h+b_h
H=XWh+bh
O
=
H
W
o
+
b
o
O=HW_o+b_o
O=HWo+bo
也就是将隐藏层的输出直接作为输出层的输入。如果将以上两个式子联立起来,可以得到
O
=
(
X
W
h
+
b
h
)
W
o
+
b
o
=
X
W
h
W
o
+
b
h
W
o
+
b
o
O=(XW_h+b_h)W_o+b_o=XW_h W_o +b_h W_o +b_o
O=(XWh+bh)Wo+bo=XWhWo+bhWo+bo
激活函数
ReLU 激活函数
R e L U ( x ) = m a x ( x , 0 ) ReLU (x)=max(x,0) ReLU(x)=max(x,0)
import torch
import numpy as np
import matplotlib.pylab as plt
import sys
sys.path.append("..")
import d2lzh_pytorch as d2l
def xyplot(x_vals, y_vals, name):
d2l.set_figsize(figsize=(5, 2.5))
d2l.plt.plot(x_vals.detach().numpy(), y_vals.detach().numpy())
d2l.plt.xlabel('x')
d2l.plt.ylabel(name + '(x)')
x=torch.arange(-8.0,8.0,0.1,requires_grad=True)
y=x.relu()
xyplot(x,y,'relu')
函数图像
y.sum().backward()
xyplot(x, x.grad, 'grad of relu')
梯度图
sigmoid 函数
s i g m o i d ( x ) = 1 1 + e x p ( − x ) sigmoid(x)=\frac {1}{1+exp(-x)} sigmoid(x)=1+exp(−x)1
梯度函数
s
i
g
m
o
i
d
′
(
x
)
=
s
i
g
m
o
i
d
(
x
)
(
1
−
s
i
g
m
o
i
d
(
x
)
)
sigmoid'(x)=sigmoid(x)(1-sigmoid(x))
sigmoid′(x)=sigmoid(x)(1−sigmoid(x))
梯度图像
tanh函数
tanh(双曲正切)函数可以将元素的值变换到(-1)到1之间
t
a
n
h
(
x
)
=
1
−
e
x
p
(
−
2
x
)
1
+
e
x
p
(
−
2
x
)
tanh(x)=\frac{1-exp(-2x)}{1+exp(-2x)}
tanh(x)=1+exp(−2x)1−exp(−2x)
梯度函数
t
a
n
h
′
(
x
)
=
1
−
t
a
n
h
2
(
x
)
tanh'(x)=1-tanh^2(x)
tanh′(x)=1−tanh2(x)
梯度图像
多层感知机
H
=
ψ
(
X
W
h
+
b
h
)
H=\psi(XW_h+b_h)
H=ψ(XWh+bh)
O
=
H
W
o
+
b
o
O=HW_o+b_o
O=HWo+bo
ψ
代
表
多
层
感
知
机
的
激
活
函
数
,
我
们
可
以
在
最
后
的
输
出
上
添
加
损
失
函
数
,
S
O
F
T
M
A
X
等
\psi 代表多层感知机的激活函数,我们可以在最后的输出上添加损失函数,SOFTMAX等
ψ代表多层感知机的激活函数,我们可以在最后的输出上添加损失函数,SOFTMAX等
多层感知机从零开始实现
import torch
import numpy as np
import sys
sys.path.append("..")
batch_size=256
train_iter, test_iter = load_data_fashion_mnist(batch_size)
# Fashion-MNIST数据集中的图像形状为28x28=784,我们定义最后的输出格式为10
num_inputs, num_outputs, num_hiddens = 784, 10, 256
W1 = torch.tensor(np.random.normal(0, 0.01, (num_inputs,num_hiddens)), dtype=torch.float)
b1 = torch.zeros(num_hiddens, dtype=torch.float)
W2 = torch.tensor(np.random.normal(0, 0.01, (num_hiddens,num_outputs)), dtype=torch.float)
b2 = torch.zeros(num_outputs, dtype=torch.float)
params = [W1, b1, W2, b2]
param.requires_grad_(requires_grad=True)
# 下面定义激活函数
def relu(x):
return torch.max(input=X, other=torch.tensor(0.0))
# 定义模型
def net(X):
X = X.view((-1, num_inputs))
H = relu(torch.matmul(X, W1) + b1)
return torch.matmul(H, W2) + b2
# 定义损失函数
loss = torch.nn.CrossEntropyLoss()
# 训练模型
def train_ch3(net,train_iter,test_iter,
loss,num_epochs,batch_size,
params=None,lr=None,optimizer=None):
for epoch in range(num_epochs):
train_l_sum,train_acc_sum,n=0.0,0.0,0
for X,y in train_iter:
y_hat=net(X)
l=loss(y_hat,y).sum()
#梯度请零
if optimizer is not None:
optimizer.zero_grad()
elif params in not None and params[0] is not None:
for param in params:
param.grad.data.zero_()
l.backward()
if optimizer in None:
sgd(params,lr,batch_size)
else:
optimizer.step()
train_l_sum +=l.item()
train_acc_sum +=(y_hat.argmax(dim=1)==y).sum().item()
n+=y.shape[0]
test_acc=evaluate_accuracy(test_iter,net)
print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f'%(epoch + 1,train_l_sum / n,train_acc_sum / n,test_acc))
num_epochs,lr=5,100.0
train_ch3(net,train_iter,test_iter,loss,num_epochs,batch_size,params,lr)
多层感知机的简洁实现
import torch
from torch import nn
from torch.nn import init
import numpy as np
import sys
sys.path.append("..")
import d2lzh_pytorch as d2l
# 模型定义
num_inputs,num_outpus,num_hiddens=784,10,256
class FlattenLayer(nn.Module):
def __init__(self)L
super(FlattenLayer,self).__init__()
def forward(self,x):
return x.view(x.shape[0],-1)
net=nn.Sequential(
FlattenLayer(),
nn.Linear(num_inputs,num_hiddens),
nn.ReLU(),
nn.Linear(num_hiddens,num_outputs),)
for params in net.parameters():
init.normal_(params,mean=0,std=0.01)
# 读取数据并训练模型
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
loss = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(), lr=0.5)
num_epochs = 5
d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs,batch_size, None, None, optimizer)