实例:SGD在神经网络中的应用及其反向传播过程

47 篇文章 2 订阅
17 篇文章 0 订阅

利用神经网络模拟直线,并验证自己对反向传播的理解。
具体推导见文章末尾。
链式法则参考https://zhuanlan.zhihu.com/p/97476874

import torch
import torch.nn as nn
import torch.optim as optim
import math
import random
import numpy as np
import torch.nn.functional as F

# 设置随机种子
def setup_seed(seed):
    torch.manual_seed(seed)  # 为cpu分配随机种子
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)  # 为gpu分配随机种子
        torch.cuda.manual_seed_all(seed)  # 若使用多块gpu,使用该命令设置随机种子
    random.seed(seed)
    np.random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

setup_seed(666)

class model(nn.Module):
    def __init__(self, n_feature, n_hidden):
        super(model, self).__init__()
        self.hidden = torch.nn.Linear(n_feature, n_hidden)  # 隐藏层
        self.predict = torch.nn.Linear(n_hidden, n_feature)  # 输出层

    def forward(self, x, target):
        y1 = self.hidden(x)
        # print(self.hidden.weight)
        # print(self.hidden.bias)
        # print(y1)
        y2 = F.relu(y1)  # 隐藏层用 relu

        # print(y2)
        y3 = self.predict(y2)
        # print(y3)
        # print(self.predict.weight)
        # print(self.predict.bias)
        loss = MSE(y3, target)
        return loss


# Training loop
x_data = torch.linspace(-1, 1, 10)  # x_data设置小一点,可以快速得到结果
x = torch.unsqueeze(x_data, dim=-1)
target = x.pow(2) + 0.2 * torch.rand(x.size())

# print(x)
# print(target)
model = model(n_feature=1, n_hidden=10)
optimizer = optim.SGD(model.parameters(), lr=0.5, momentum=0.9)  # 学习率设置大一点,也可以快速得到结果

MSE = nn.MSELoss()

for epoch in range(2):
    loss = model(x, target)
    # print(loss)

    optimizer.zero_grad()
    loss.backward()
    # print(optimizer.param_groups[0])
    res1 = [x for x in optimizer.param_groups[0]['params']]
    # print(res1)
    # print("-----------------------------------------------------")
    res2 = [x.grad for x in optimizer.param_groups[0]['params']]
    # print(res2)
    # print("-----------------------------------------------------")
    optimizer.step()
    res3 = [x for x in optimizer.param_groups[0]['params']]
    # print(res3)
    # print("-----------------------------------------------------")

    exit()





'''
x = [[-1.0000], [-0.7778], [-0.5556], [-0.3333], [-0.1111], [ 0.1111], [ 0.3333], [ 0.5556], [ 0.7778], [ 1.0000]]


y1 = 
tensor([[ 0.0227, -0.3667, -0.7500,  0.7615, -0.2314, -0.4504, -0.3309, -0.9571,
          0.0028,  0.1209],
        [-0.0875, -0.3437, -0.5818,  0.7740, -0.2332, -0.2375, -0.2946, -0.9627,
         -0.1905,  0.2323],
        [-0.1977, -0.3206, -0.4136,  0.7865, -0.2349, -0.0245, -0.2583, -0.9682,
         -0.3838,  0.3437],
        [-0.3079, -0.2976, -0.2454,  0.7990, -0.2367,  0.1884, -0.2220, -0.9738,
         -0.5772,  0.4551],
        [-0.4182, -0.2746, -0.0772,  0.8115, -0.2384,  0.4014, -0.1856, -0.9794,
         -0.7705,  0.5665],
        [-0.5284, -0.2516,  0.0910,  0.8240, -0.2402,  0.6143, -0.1493, -0.9849,
         -0.9639,  0.6779],
        [-0.6386, -0.2286,  0.2592,  0.8365, -0.2419,  0.8273, -0.1130, -0.9905,
         -1.1572,  0.7893],
        [-0.7488, -0.2056,  0.4274,  0.8490, -0.2437,  1.0402, -0.0767, -0.9961,
         -1.3506,  0.9007],
        [-0.8590, -0.1826,  0.5956,  0.8615, -0.2454,  1.2532, -0.0404, -1.0017,
         -1.5439,  1.0121],
        [-0.9692, -0.1596,  0.7638,  0.8740, -0.2471,  1.4661, -0.0041, -1.0072,
         -1.7373,  1.1234]], grad_fn=<AddmmBackward>)

y2 = 
tensor([[0.0227, 0.0000, 0.0000, 0.7615, 0.0000, 0.0000, 0.0000, 0.0000, 0.0028,
         0.1209],
        [0.0000, 0.0000, 0.0000, 0.7740, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.2323],
        [0.0000, 0.0000, 0.0000, 0.7865, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.3437],
        [0.0000, 0.0000, 0.0000, 0.7990, 0.0000, 0.1884, 0.0000, 0.0000, 0.0000,
         0.4551],
        [0.0000, 0.0000, 0.0000, 0.8115, 0.0000, 0.4014, 0.0000, 0.0000, 0.0000,
         0.5665],
        [0.0000, 0.0000, 0.0910, 0.8240, 0.0000, 0.6143, 0.0000, 0.0000, 0.0000,
         0.6779],
        [0.0000, 0.0000, 0.2592, 0.8365, 0.0000, 0.8273, 0.0000, 0.0000, 0.0000,
         0.7893],
        [0.0000, 0.0000, 0.4274, 0.8490, 0.0000, 1.0402, 0.0000, 0.0000, 0.0000,
         0.9007],
        [0.0000, 0.0000, 0.5956, 0.8615, 0.0000, 1.2532, 0.0000, 0.0000, 0.0000,
         1.0121],
        [0.0000, 0.0000, 0.7638, 0.8740, 0.0000, 1.4661, 0.0000, 0.0000, 0.0000,
         1.1234]], grad_fn=<ReluBackward0>)

y3 = 
tensor([[ 0.1421],
        [ 0.1106],
        [ 0.0757],
        [ 0.0540],
        [ 0.0341],
        [ 0.0031],
        [-0.0372],
        [-0.0776],
        [-0.1179],
        [-0.1583]], grad_fn=<AddmmBackward>)

target = [[1.0624], [0.6590], [0.3310], [0.1313], [0.0499], [0.0160], [0.1774], [0.3256], [0.7196], [1.0016]]


'''




'''
w1_weight_01 = 
tensor([[-0.4960],
        [ 0.1035],
        [ 0.7569],
        [ 0.0563],
        [-0.0078],
        [ 0.9583],
        [ 0.1634],
        [-0.0251],
        [-0.8700],
        [ 0.5013]]

w1_weight_grad =       
       [[-0.0272],
        [ 0.0000],
        [ 0.0512],
        [ 0.0158],
        [ 0.0000],
        [-0.0294],
        [ 0.0000],
        [ 0.0000],
        [-0.0113],
        [ 0.0354]]

w1_weight_02 = 
tensor([[-0.4824],
        [ 0.1035],
        [ 0.7313],
        [ 0.0484],
        [-0.0078],
        [ 0.9730],
        [ 0.1634],
        [-0.0251],
        [-0.8644],
        [ 0.4836]])
'''




'''



w2_weight_01 = [-0.1475, -0.2782, -0.1215, -0.1338,  0.2476,  0.0707, -0.0606, -0.0268,
         -0.0613, -0.2990]])

w2_weight_grad = tensor([[-4.1780e-03,  0.0000e+00, -3.2279e-01, -7.3368e-01,  0.0000e+00,
         -6.7517e-01,  0.0000e+00,  0.0000e+00, -5.2251e-04, -6.1250e-01]])

w2_weight_02 = [[-0.1454, -0.2782,  0.0399,  0.2330,  0.2476,  0.4083, -0.0606, -0.0268,
         -0.0610,  0.0072]])

w2_weight_grad = tensor([[0.0000, 0.0000, 0.3009, 1.0942, 0.0000, 1.0365, 0.0000, 0.0000, 0.0000,
         0.8401]])

w2_weight_03 = [[-0.1436, -0.2782,  0.0347,  0.0161,  0.2476,  0.1939, -0.0606, -0.0268,
         -0.0608, -0.1372]])



更新公式:
w2 = w1 - lr * v1
v1 = v0 * momentum + w1_grad
v0 = 0
'''

在这里插入图片描述
在这里插入图片描述
在这里插入图片描述

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值