2 神经网络学习——对numpy手敲神经网络的三次改进

1 第一次改进——修改成torch语法

1.1 初始化部分

import torch
import torch.nn as nn
import torch.nn.functional as F

import numpy as np
import pandas as pd
import random, math

import sklearn
import scipy

# 大小定义
N, D_in, H, D_out = 64, 1000, 100, 10

# 变量初始化
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)

w1 = torch.randn(D_in, H)
w2 = torch.randn(H, D_out)

# 设置学习率
learning_rate = 1e-6

1.2 核心部分

for i in range(6000):
    # Forward pass
    h = x.mm(w1) # N * H
    h_relu = h.clamp(min = 0) # N * H
    y_pred = h_relu.mm(w2) # N * D_out
    
    # loss
    loss = (y_pred - y).pow(2).sum().item() # 计算图,.item是从单个的tensor里面取出那个元素
    if i % 300 == 0:
        print(i, loss, end = '||')
    
    # backward pass
    grad_y_pred = 2 * (y_pred - y) # N * D_out
    grad_w2 = h_relu.t().mm(grad_y_pred) # 因为是 N * H 矩阵 和  N * D_out 矩阵相乘,所以前者必须转置!输出为H * D_out
    grad_h_relu = grad_y_pred.mm(w2.t()) # N * H
    grad_h = grad_h_relu.clone()
    grad_h[h<0] = 0 # N * H
    grad_w1 = x.t().mm(grad_h) # D_in * H  因为我最后的梯度肯定要能被同样维度的w1减去  
    
    # update weights of w1 and w2
    w1 -= learning_rate * grad_w1
    w2 -= learning_rate * grad_w2

2 第二次改进——使用autograd

2.1 初始化部分

import torch
import torch.nn as nn
import torch.nn.functional as F

import numpy as np
import pandas as pd
import random, math

import sklearn
import scipy

# 大小定义
N, D_in, H, D_out = 64, 1000, 100, 10

# 变量初始化
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)

w1 = torch.randn(D_in, H, requires_grad=True) # 这里要设置需要梯度
w2 = torch.randn(H, D_out, requires_grad=True)

# 设置学习率
learning_rate = 1e-6

2.2 核心部分

for i in range(6000):
    # Forward pass
    y_pred = x.mm(w1).clamp(min=0).mm(w2) # N * H
#     h_relu = h.clamp(min = 0) # N * H
#     y_pred = h_relu.mm(w2) # N * D_out
    
    # loss
    loss = (y_pred - y).pow(2).sum() # 计算图,.item是从单个的tensor里面取出那个元素
    if i % 300 == 0:
        print(i, loss.item(), )
    
    # backward pass
#     grad_y_pred = 2 * (y_pred - y) # N * D_out
#     grad_w2 = h_relu.t().mm(grad_y_pred) # 因为是 N * H 矩阵 和  N * D_out 矩阵相乘,所以前者必须转置!输出为H * D_out
#     grad_h_relu = grad_y_pred.mm(w2.t()) # N * H
#     grad_h = grad_h_relu.clone()
#     grad_h[h<0] = 0 # N * H
#     grad_w1 = x.t().mm(grad_h) # D_in * H  因为我最后的梯度肯定要能被同样维度的w1减去 
    loss.backward()
    
    # update weights of w1 and w2
    with torch.no_grad():
        w1 -= learning_rate * w1.grad
        w2 -= learning_rate * w2.grad
#         if i % 300 == 0:
#             print(w1.grad, w2.grad)
        w1.grad.zero_() # 没有这两句会梯度消失
        w2.grad.zero_()
#         if i % 300 == 0:
#             print(w1.grad, w2.grad) # 每一个张量都是0

3 第三次改进——使用pytorch:nn

3.1 初始化部分

import torch
import torch.nn as nn
import torch.nn.functional as F

import numpy as np
import pandas as pd
import random, math

import sklearn
import scipy

N, D_in, H, D_out = 64, 1000, 100, 10
x = torch.randn(N, D_in) # 如果要用gpu,后面加上.cuda()或.to_device('gpu_name')
y = torch.randn(N, D_out)
# 没有w1和w2了

# 首先自己定义序列层次
model = torch.nn.Sequential(
    torch.nn.Linear(D_in, H, bias=False), # w_1 * x + b_1
    torch.nn.ReLU(),
    torch.nn.Linear(H, D_out, bias=False)
)

# 此处为经验之谈!不加也可以!!!!!!
# 学习率不变的情况下,初始化为正态分布之后模型收敛得明显加快!!(玄学。。。)
torch.nn.init.normal_(model[0].weight)
torch.nn.init.normal_(model[2].weight)

# model = model.cuda() # 在gpu上执行
# 初始化一个均方差的实例对象
loss_fn = nn.MSELoss(reduction='sum')

3.2 核心部分——实现质的飞跃!

learning_rate = 1e-7
for i in range(20001):
    # forward pass
    y_pred = model(x)
    
    # loss
    loss = loss_fn(y_pred, y) # 计算图
    if i % 2000 == 0:
        print(i, loss.item())
    
    # backward pass
    loss.backward()
    
    # update w1, w2
    with torch.no_grad():
        for param in model.parameters(): # param包括:(tensor, grad)
            param -= learning_rate * param.grad
            
    model.zero_grad()    
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值