1.0版
import torch
dtype = torch.float
#device = torch.device("cpu")
device = torch.device("cuda:0") # Uncomment this to run on GPU
# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 1, 100, 1
#learing rate
LR = 1e-5
# Create random input and output data
x = torch.randn(N, D_in,device = device,dtype = dtype)
y = x*10
# Randomly initialize weights
w1 = torch.randn(D_in, H, device=device, dtype=dtype, requires_grad = True)
w2 = torch.randn(H, D_out, device=device, dtype=dtype, requires_grad = True)
for t in range(500):
"""
h = x.mm(w1)
#ReLu Acticivation Function
h_relu = h.clamp(min=0)
pred_y = h_relu.mm(w2)
#calculate the loss
loss = (pred_y - y).pow(2).sum().item()
print(t,loss)
Manually update weights using gradient descent
#Backpropagation
grad_y = (pred_y - y)*2
grad_w2 = h_relu.t().mm(grad_y)
grad_h_relu = grad_y.mm(w2.t())#这行原理未知,可能是求逆运算
grad_h = grad_h_relu.clone()
#ReLU:if x<0 :x=0 so grad_x = 0
grad_h[h<0] = 0
grad_w1 = x.t().mm(grad_h)
# Update weights using gradient descent
w1 -= LR * grad_w1
w2 -= LR * grad_w2
"""
pred_y = x.mm(w1).clamp(min = 0).mm(w2)
loss = (pred_y - y).pow(2).sum()
print(t, loss.item())
loss.backward()
with torch.no_grad():
w1 -= LR*w1.grad
w2 -= LR*w2.grad
# Manually zero the gradients after updating weights
w1.grad.zero_()
w2.grad.zero_()
#show the reasult
print(w1)
print(w2)
print(w1.mm(w2))
#test
test_x = torch.randn(N, D_in,device = device,dtype = dtype)
h = test_x.mm(w1)
h_relu = h.clamp(min=0)
pred_y = h_relu.mm(w2)
print(pred_y)
print(test_x)
2.0版
import torch
import torch.nn as nn
import random
#device = torch.device("cpu")
device = torch.device("cuda:0") # Uncomment this to run on GPU
# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 1, 100, 1
#learing rate
LR = 1e-4
# Create random input and output data
x = torch.randn(N, D_in,device = device)
y = x*10
class MyLinearRegression(nn.Module):
def __init__(self,D_in,H,D_out):
super(MyLinearRegression,self).__init__()
self.input_layer = nn.Linear(D_in,H).to(device)
self.middle_layer = nn.Linear(H,H).to(device)
self.output_layer = nn.Linear(H,D_out).to(device)
def forward(self, x):
x = x.to(device)
h_relu = self.input_layer(x).clamp(min=0)
for _ in range(random.randint(0, 3)):
h_relu = self.middle_layer(h_relu).clamp(min=0)
y_pred = self.output_layer(h_relu)
return y_pred
model = MyLinearRegression(D_in,H,D_out)
criterion = nn.MSELoss(size_average=False)
optimzer = torch.optim.SGD(model.parameters(),lr=LR)
for t in range(500):
pred_y = model(x)
loss = criterion(pred_y,y)
print(t,loss.item())
optimzer.zero_grad()
loss.backward()
optimzer.step()
#test
test_x = torch.randn(N, D_in,device = device)
pred_y = model(test_x)
print(pred_y-10*test_x)