numpy()实现两层的神经网络

欢迎关注

1.numpy()实现两层的神经网络

import numpy as np

N, D_in, H, D_out = 64, 1000, 100, 10
x = np.random.randn(N, D_in)
y = np.random.randn(N, D_out)

w1 = np.random.randn(D_in, H)
w2 = np.random.randn(H, D_out)

learning_rate = 1e-6
for it in range(500):
    # forwrd pass
    h = x.dot(w1)
    h_relu = np.maximum(h, 0)
    y_pred = h_relu.dot(w2)

    # computer loss
    loss = np.square(y_pred-y).sum()
    print(it, loss)

    # backward the gradient
    # computer gradient

    grad_y_ped = 2.0*(y_pred-y)  # 2.0,注意写成小数
    grad_w2 = h_relu.T.dot(grad_y_ped)
    grad_h_relu = grad_y_ped.dot(w2.T)
    grad_h = grad_h_relu.copy()
    grad_h[h<0] = 0
    grad_w1 = x.T.dot(grad_h)

    w1 = w1-learning_rate*grad_w1
    w2 = w2-learning_rate*grad_w2
    
h = x.dot(w1)
h_relu = np.maximum(h, 0)
y_pred = h_relu.dot(w2)
y_pred

2.pytorch的实现方式

2.1方法一

import torch
N, D_in, H, D_out = 64, 1000, 100, 10
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)

w1 = torch.randn(D_in, H)
w2 = torch.randn(H, D_out)

learning_rate = 1e-6
for it in range(500):
    # forwrd pass
    h = x.mm(w1)
    h_relu = h.clamp(min=0)
    y_pred = h_relu.mm(w2)

    # computer loss
    loss = (y_pred-y).pow(2).sum().item()
    print(it, loss)

    # backward the gradient
    # computer gradient

    grad_y_ped = 2.0*(y_pred-y)  # 2.0,注意写成小数
    grad_w2 = h_relu.t().mm(grad_y_ped)
    grad_h_relu = grad_y_ped.mm(w2.t())
    grad_h = grad_h_relu.clone()
    grad_h[h<0] = 0
    grad_w1 = x.t().mm(grad_h)

    w1 = w1-learning_rate*grad_w1
    w2 = w2-learning_rate*grad_w2

    

2.2方法二

import torch

N, D_in, H, D_out = 64, 1000, 100, 10

x = torch.randn(N, D_in)
y = torch.randn(N, D_out)

w1 = torch.randn(D_in, H, requires_grad=True )
w2 = torch.randn(H, D_out,requires_grad=True)

learning_rate = 1e-6
for it in range(500):
    # forwrd pass
    y_pred = x.mm(w1).clamp(min=0).mm(w2)

    # computer loss
    loss = (y_pred-y).pow(2).sum() # computation graph
    print(it, loss.item())

    # backward the gradient
    # computer gradient

    loss.backward()
    with torch.no_grad():
        w1 -= learning_rate * w1.grad
        w2 -= learning_rate * w2.grad
        w1.grad.zero_()
        w2.grad.zero_()

2.3方法三

import torch
import torch.nn as nn

N, D_in, H, D_out = 64, 1000, 100, 10
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)

model = nn.Sequential(
    nn.Linear(D_in, H),
    nn.ReLU(),
    nn.Linear(H, D_out)
)

# 如果运行效果不佳,可以修改初始化权值,可以输出 model 模型看看效果。
# 发现效果不错。如果不修改初始值,发现 learning_rate=1e-4 时,效果依然很好
torch.nn.init.normal(model[0].weight)
torch.nn.init.normal(model[2].weight)

loss_fn = nn.MSELoss(reduction='sum')

learning_rate=1e-6
for it in range(500):
    y_pred = model(x)
    
    loss = loss_fn(y_pred, y)
    print(it, loss.item())
    model.zero_grad()
    loss.backward()
    
    with torch.no_grad():
        for param in model.parameters():  # 注意加括号
            param -= learning_rate*param.grad


model

运行结果

Sequential(
  (0): Linear(in_features=1000, out_features=100, bias=True)
  (1): ReLU()
  (2): Linear(in_features=100, out_features=10, bias=True)
)
model[0].weight

运行结果

Parameter containing:
tensor([[-0.0266, -0.0164,  0.0012,  ...,  0.0234, -0.0326,  0.0072],
        [ 0.0200,  0.0349,  0.0044,  ..., -0.0209, -0.0144,  0.0198],
        [-0.0111,  0.0187, -0.0287,  ...,  0.0016, -0.0272, -0.0213],
        ...,
        [ 0.0111,  0.0292,  0.0020,  ...,  0.0065, -0.0280, -0.0190],
        [ 0.0114,  0.0102, -0.0292,  ..., -0.0309,  0.0108,  0.0305],
        [-0.0181, -0.0353,  0.0144,  ..., -0.0284, -0.0189,  0.0016]],
       requires_grad=True)

2.4方法四

  • Adam常常设置的学习率在(1e-3,1e-4)
import torch
import torch.nn as nn

N, D_in, H, D_out = 64, 1000, 100, 10
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)

model = nn.Sequential(
    nn.Linear(D_in, H),
    nn.ReLU(),
    nn.Linear(H, D_out)
)

# 如果运行效果不佳,可以修改初始化权值,可以输出 model 模型看看效果。
# 发现效果不错。如果不修改初始值,发现 learning_rate=1e-4 时,效果依然很好
# torch.nn.init.normal(model[0].weight)
# torch.nn.init.normal(model[2].weight)

loss_fn = nn.MSELoss(reduction='sum')
learning_rate = 1e-4
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)
for it in range(500):
    y_pred = model(x)
    
    loss = loss_fn(y_pred, y)
    print(it, loss.item())
    optimizer.zero_grad()
#     model.zero_grad()
    loss.backward()
    
    # update model parameters
    optimizer.step()



2.5方法五

import torch

N, D_in, H, D_out = 64, 1000, 100, 10
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)

class TwoLayerNet(torch.nn.Module):
    def __init__(self, D_in, D_out):  # define the model architecture
        super(TwoLayerNet, self).__init__()
        self.linear1 = torch.nn.Linear(D_in, H)  # 在句尾多家一个逗号,会报错
        self.linear2 = torch.nn.Linear(H, D_out)
    def forward(self, x):
        y_pred = self.linear2(self.linear1(x).clamp(min = 0))
        return y_pred
    
model = TwoLayerNet(D_in, D_out)

loss_fn = nn.MSELoss(reduction='sum')
learning_rate = 1e-4
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)
for it in range(500):
    y_pred = model(x)
    
    loss = loss_fn(y_pred, y)
    print(it, loss.item())
    optimizer.zero_grad()
#     model.zero_grad()
    loss.backward()
    
    # update model parameters
    optimizer.step()

  • 1
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值