手推MLP反向传播过程 + 含有dropout的MLP反向传播过程

本文通过手动推导和PyTorch自动求导,详细展示了MLP网络中使用LeakyReLU激活函数和Softmax损失函数的二分类问题。首先,介绍了网络结构和前向传播过程,然后手推了反向传播的权重更新步骤,并使用PyTorch验证了结果。接着,添加了Dropout层,再次进行前向传播和反向传播,并观察了Dropout对权重更新的影响。整个过程揭示了深度学习模型中反向传播的计算原理。
摘要由CSDN通过智能技术生成

MLP二分类,softmax回归,只使用批量大小为1的样本,进行一次前向传播和反向传播

手推MLP反向传播

先使用数学公式描述一下这个网络,使用交叉熵损失函数
在这里插入图片描述
前向过程,值得注意的是比如当前元素是0.0675,使用 L e a k y R e l u ( x ) = x Leaky Relu(x)=x LeakyRelu(x)=x这一部分,反向传播时需要对激活函数求导并逐元素相乘,0.0675这个元素的位置反向传播时 L e a k y R e l u ( x ) ′ = 1 Leaky Relu(x)'=1 LeakyRelu(x)=1
在这里插入图片描述
由于使用链式求导法则,可以一段一段求导,最后相乘,需要注意求导的元素是标量、向量还是矩阵
在这里插入图片描述
反向传播过程,主要对 W ( 1 ) , W ( 2 ) W^{(1)},W^{(2)} W(1),W(2)进行了更新,需要注意的是如果激活函数是分段的(比如Leaky Relu),这里橙色部分的导数是1还是 α \alpha α
在这里插入图片描述

使用PyTorch自动求导进行验证

将上文红色框中手推的更新后的权重与PyTorch自动求导后相对比

import torch
from torch import nn
from torch.nn import functional as F
from torch import optim
torch.set_printoptions(precision=6)
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        
        self.w1 = nn.Linear(2,3)
        self.w1.weight.data = torch.Tensor([[0.15, 0.2], [0.25, 0.3], [0.35, 0.4]])
        self.w1.bias.data = torch.Tensor([0.04, 0.08, 0.12])
        
#         self.activate = nn.ReLU()
        self.activate = nn.LeakyReLU(negative_slope=0.1)
#         self.activate = nn.Sigmoid()
        
        self.w2 = nn.Linear(2,2)
        self.w2.weight.data = torch.Tensor([[0.45, 0.5, 0.55], [0.6, 0.65, 0.7]])
        self.w2.bias.data = torch.Tensor([0.16, 0.20])
        
        self.softmax = nn.Softmax(dim=0)
            
    def forward(self, x):
        h = self.activate(self.w1(x))
        o = self.w2(h)
        y = self.softmax(o)
        return h, o, y
    
# def init_weights(m):
#     if type(m) == nn.Linear:
#         nn.init.normal_(m.weight, std=0.01)
#         nn.init.zeros_(m.bias)

def unrandom(seed=2022):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    
unrandom()
net = MLP()
# net.apply(init_weights)
optimizer = optim.SGD(net.parameters(), lr=0.1)
loss = nn.CrossEntropyLoss()
# 以e为底数
unrandom()
x = torch.tensor([0.05, 0.10], requires_grad=True)
y = torch.tensor([1], requires_grad=False)
print(x, y)
	tensor([0.050000, 0.100000], requires_grad=True) tensor([1])
h, o, y_pred = net(x)
o.unsqueeze_(0)
print(h, o, y_pred)
	tensor([0.067500, 0.122500, 0.177500], grad_fn=<LeakyReluBackward0>) 	tensor([[0.349250, 0.444375]], grad_fn=<UnsqueezeBackward1>) tensor([0.476237, 0.523763], grad_fn=<SoftmaxBackward>)
l = loss(o, y)
l
	tensor(0.646715, grad_fn=<NllLossBackward>)
print(net.state_dict())
optimizer.zero_grad()
l.backward(retain_graph=True)
    OrderedDict([('w1.weight', tensor([[0.150000, 0.200000],
            [0.250000, 0.300000],
            [0.350000, 0.400000]])), ('w1.bias', tensor([0.040000, 0.080000, 0.120000])), ('w2.weight', tensor([[0.450000, 0.500000, 0.550000],
            [0.600000, 0.650000, 0.700000]])), ('w2.bias', tensor([0.160000, 0.200000]))])
optimizer.step()
print(net.state_dict())
    OrderedDict([('w1.weight', tensor([[0.150357, 0.200714],
            [0.250357, 0.300714],
            [0.350357, 0.400714]])), ('w1.bias', tensor([0.047144, 0.087144, 0.127144])), ('w2.weight', tensor([[0.446785, 0.494166, 0.541547],
            [0.603215, 0.655834, 0.708453]])), ('w2.bias', tensor([0.112376, 0.247624]))])

含有dropout的MLP反向传播过程

含有dropout的MLP数学描述和前向过程,其中权重等参数与上文相同,唯一不同在于添加了一个dropout层
在这里插入图片描述
反向传播过程:
在这里插入图片描述

使用PyTorch自动求导进行验证

import torch
from torch import nn
from torch.nn import functional as F
from torch import optim
torch.set_printoptions(precision=6)
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        
        self.w1 = nn.Linear(2,3)
        self.w1.weight.data = torch.Tensor([[0.15, 0.2], [0.25, 0.3], [0.35, 0.4]])
        self.w1.bias.data = torch.Tensor([0.04, 0.08, 0.12])
        
        self.activate = nn.LeakyReLU(negative_slope=0.1)
        self.dropout = nn.Dropout(p=0.75)
        
        self.w2 = nn.Linear(2,2)
        self.w2.weight.data = torch.Tensor([[0.45, 0.5, 0.55], [0.6, 0.65, 0.7]])
        self.w2.bias.data = torch.Tensor([0.16, 0.20])
        
        self.softmax = nn.Softmax(dim=0)
            
    def forward(self, x):
        h = self.activate(self.w1(x))
        d = self.dropout(h)
        o = self.w2(d)
        y = self.softmax(o)
        return h, d, o, y
    
# def init_weights(m):
#     if type(m) == nn.Linear:
#         nn.init.normal_(m.weight, std=0.01)
#         nn.init.zeros_(m.bias)

def unrandom(seed=2022):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    
unrandom()
net = MLP()
# net.apply(init_weights)
optimizer = optim.SGD(net.parameters(), lr=0.1)
loss = nn.CrossEntropyLoss()
# 以e为底数
unrandom()
x = torch.tensor([0.05, 0.10], requires_grad=True)
y = torch.tensor([1], requires_grad=False)
print(x, y)
    tensor([0.050000, 0.100000], requires_grad=True) tensor([1])
h, d, o, y_pred = net(x)
o.unsqueeze_(0)
print(h, d, o, y_pred)
    tensor([0.067500, 0.122500, 0.177500], grad_fn=<LeakyReluBackward0>) tensor([0.270000, 0.490000, 0.000000], grad_fn=<MulBackward0>) tensor([[0.526500, 0.680500]], grad_fn=<UnsqueezeBackward1>) tensor([0.461576, 0.538424], grad_fn=<SoftmaxBackward>)
l = loss(o, y)
l
    tensor(0.619109, grad_fn=<NllLossBackward>)
net
    MLP(
      (w1): Linear(in_features=2, out_features=3, bias=True)
      (activate): LeakyReLU(negative_slope=0.1)
      (dropout): Dropout(p=0.75, inplace=False)
      (w2): Linear(in_features=2, out_features=2, bias=True)
      (softmax): Softmax(dim=0)
    )
print(net.state_dict())
optimizer.zero_grad()
l.backward(retain_graph=True)
    OrderedDict([('w1.weight', tensor([[0.150000, 0.200000],
            [0.250000, 0.300000],
            [0.350000, 0.400000]])), ('w1.bias', tensor([0.040000, 0.080000, 0.120000])), ('w2.weight', tensor([[0.450000, 0.500000, 0.550000],
            [0.600000, 0.650000, 0.700000]])), ('w2.bias', tensor([0.160000, 0.200000]))])
optimizer.step()
print(net.state_dict())
    OrderedDict([('w1.weight', tensor([[0.151385, 0.202769],
            [0.251385, 0.302769],
            [0.350000, 0.400000]])), ('w1.bias', tensor([0.067695, 0.107695, 0.120000])), ('w2.weight', tensor([[0.437537, 0.477383, 0.550000],
            [0.612463, 0.672617, 0.700000]])), ('w2.bias', tensor([0.113842, 0.246158]))])
  • 0
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值