Pytorch 学习笔记（一）LinearRegresion的实现-CSDN博客

本文链接：https://blog.csdn.net/qq_40358998/article/details/80383897

1.0版

import torch

dtype = torch.float
#device = torch.device("cpu")
device = torch.device("cuda:0") # Uncomment this to run on GPU

# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 1, 100, 1

#learing rate
LR = 1e-5

# Create random input and output data
x = torch.randn(N, D_in,device = device,dtype = dtype)
y = x*10

# Randomly initialize weights
w1 = torch.randn(D_in, H, device=device, dtype=dtype, requires_grad = True)
w2 = torch.randn(H, D_out, device=device, dtype=dtype, requires_grad = True)

for t in range(500):
    """
    h = x.mm(w1)
    #ReLu Acticivation Function
    h_relu = h.clamp(min=0)
    pred_y = h_relu.mm(w2)

    #calculate the loss
    loss = (pred_y - y).pow(2).sum().item()
    print(t,loss)
    Manually update weights using gradient descent
    
    #Backpropagation
    grad_y = (pred_y - y)*2
    grad_w2 = h_relu.t().mm(grad_y)
    grad_h_relu = grad_y.mm(w2.t())#这行原理未知,可能是求逆运算
    grad_h = grad_h_relu.clone()
    #ReLU：if x<0 :x=0 so grad_x = 0
    grad_h[h<0] = 0
    grad_w1 = x.t().mm(grad_h)

    # Update weights using gradient descent
    w1 -= LR * grad_w1
    w2 -= LR * grad_w2
    """
    pred_y = x.mm(w1).clamp(min = 0).mm(w2)
    loss = (pred_y - y).pow(2).sum()
    print(t, loss.item())

    loss.backward()
    with torch.no_grad():
        w1 -= LR*w1.grad
        w2 -= LR*w2.grad
        # Manually zero the gradients after updating weights
        w1.grad.zero_()
        w2.grad.zero_()


#show the reasult
print(w1)
print(w2)
print(w1.mm(w2))
#test
test_x = torch.randn(N, D_in,device = device,dtype = dtype)
h = test_x.mm(w1)
h_relu = h.clamp(min=0)
pred_y = h_relu.mm(w2)
print(pred_y)
print(test_x)

2.0版

import torch
import torch.nn as nn
import random
#device = torch.device("cpu")
device = torch.device("cuda:0") # Uncomment this to run on GPU

# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 1, 100, 1

#learing rate
LR = 1e-4

# Create random input and output data
x = torch.randn(N, D_in,device = device)
y = x*10

class MyLinearRegression(nn.Module):
    def __init__(self,D_in,H,D_out):
        super(MyLinearRegression,self).__init__()
        self.input_layer = nn.Linear(D_in,H).to(device)
        self.middle_layer = nn.Linear(H,H).to(device)
        self.output_layer = nn.Linear(H,D_out).to(device)

    def forward(self, x):
        x = x.to(device)
        h_relu = self.input_layer(x).clamp(min=0)
        for _ in range(random.randint(0, 3)):
            h_relu = self.middle_layer(h_relu).clamp(min=0)
        y_pred = self.output_layer(h_relu)
        return y_pred

model = MyLinearRegression(D_in,H,D_out)
criterion = nn.MSELoss(size_average=False)
optimzer = torch.optim.SGD(model.parameters(),lr=LR)

for t in range(500):
    pred_y = model(x)
    loss = criterion(pred_y,y)
    print(t,loss.item())
    optimzer.zero_grad()
    loss.backward()
    optimzer.step()

#test
test_x = torch.randn(N, D_in,device = device)
pred_y = model(test_x)
print(pred_y-10*test_x)