Pytorch小白,目前在follow这个基础课程,在此记录一些笔记和学习的心得~
Overview
A basic model training pipeline:
- Prediction
- Gradients Computation
- Loss Computation
- Parameter Updates
The following sections will be arranged as such:
Section | Prediction | Gradients Computation | Loss Computation | Parameter Updates |
1 | Manually | Manually | Manually | Manually |
2 | Manually | Autograd | Manually | Manually |
3 | Manually | Autograd | Pytorch Loss | Pytorch Optimizer |
4 | Pytorch Model | Autograd | Pytorch Loss | Pytorch Optimizer |
Since Sections 1~4 are related, one being the modification of the previous, thus a '(*)' symbol is inserted in code blocks where something has been changed. Section 5 will present a basic structure for a custom model.
Section 1
import torch
# True function: y = 3 * x
# Training Data
X = torch.tensor([1, 2, 3, 4], dtype = torch.float32)
Y = torch.tensor([3, 6, 9, 12], dtype = torch.float32)
x_test = 5.0 # test data
w = torch.tensor(0.0) # parameter initialization
# Hyperparameters
learning_rate = 0.01
n_iters = 100
# Prediction
def forward(x):
return w * x
# Loss -> MSE loss
def loss(y, y_pred):
return ((y - y_pred)**2).mean()
# Gradients -> dl/dw = -2 * (y - y_pred) * x
def grad(x, y, y_pred):
return torch.mul((-2 * x), (y - y_pred)).mean()
print(f"Prediction before training: f({x_test}) = {forward(x_test): .3f}")
# Training
for epoch in range(n_iters):
# Forward Pass
Y_pred = forward(X)
# Loss Function
l = loss(Y, Y_pred)
# Gradient
dw = grad(X, Y, Y_pred)
# Parameter Update
w -= learning_rate * dw
if epoch%10 == 0:
print(f"Iteration {epoch+1}: w = {w.item(): .3f}, loss = {l.item(): .8f}")
print(f"Prediction after training: f({x_test}) = {forward(x_test): .3f}")
Section 2
import torch
# True function: y = 3 * x
# Training Data
X = torch.tensor([1, 2, 3, 4], dtype = torch.float32)
Y = torch.tensor([3, 6, 9, 12], dtype = torch.float32)
x_test = 5.0 # test data
w = torch.tensor(0.0, requires_grad = True) # parameter initialization (*)
# Hyperparameters
learning_rate = 0.01
n_iters = 100
# Prediction
def forward(x):
return w * x
# Loss -> MSE loss
def loss(y, y_pred):
return ((y - y_pred)**2).mean()
# Gradients -> Autograd (*)
print(f"Prediction before training: f({x_test}) = {forward(x_test): .3f}")
# Training
for epoch in range(n_iters):
# Forward Pass
Y_pred = forward(X)
# Loss Function
l = loss(Y, Y_pred)
# Gradient (*)
l.backward()
dw = w.grad
# Parameter Update (*)
with torch.no_grad():
w -= learning_rate * dw
# Set gradients to zero to prevent accumulation over the iterations (*)
w.grad.zero_()
if epoch%10 == 0:
print(f"Iteration {epoch+1}: w = {w.item(): .3f}, loss = {l.item(): .8f}")
print(f"Prediction after training: f({x_test}) = {forward(x_test): .3f}")
Section 3
import torch
import torch.nn # (*)
# True function: y = 3 * x
# Training Data
X = torch.tensor([1, 2, 3, 4], dtype = torch.float32)
Y = torch.tensor([3, 6, 9, 12], dtype = torch.float32)
x_test = 5.0 # test data
w = torch.tensor(0.0, requires_grad = True) # parameter initialization
# Hyperparameters
learning_rate = 0.01
n_iters = 100
# Prediction
def forward(x):
return w * x
# Loss -> Pytorch MSE loss (*)
loss = nn.MSELoss()
# Gradients -> Autograd
# Optimizer -> Plain SGD (*)
optimizer = torch.optim.SGD([w], lr = learning_rate)
# NOTE: Need to put brackets around w
print(f"Prediction before training: f({x_test}) = {forward(x_test): .3f}")
# Training
for epoch in range(n_iters):
# Forward Pass
Y_pred = forward(X)
# Loss Function
l = loss(Y, Y_pred)
# Gradient
l.backward()
dw = w.grad
# Parameter Update (*)
optimizer.step()
# Set gradients to zero to prevent accumulation over the iterations (*)
optimizer.zero_grad()
if epoch%10 == 0:
print(f"Iteration {epoch+1}: w = {w.item(): .3f}, loss = {l.item(): .8f}")
print(f"Prediction after training: f({x_test}) = {forward(x_test): .3f}")
Section 4
import torch
import torch.nn as nn
# True function: y = 3 * x
# Training Data (*)
X = torch.tensor([[1], [2], [3], [4]], dtype = torch.float32)
Y = torch.tensor([[3], [6], [9], [12]], dtype = torch.float32)
x_test = torch.tensor([5.0]) # test data
# No need of manual parameter initialization (*)
# Hyperparameters
learning_rate = 0.01
n_iters = 100
# Prediction -> Pytorch linear function (*)
n_samples, n_features = X.size()
input_size = n_features
output_size = n_features
model = nn.Linear(input_size, output_size, bias = False)
# Input size (int), output size (int), bias (bool)
# Loss -> Pytorch MSE loss
loss = nn.MSELoss()
# Gradients -> Autograd
# Optimizer -> Plain SGD
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)
# NOTE: Need to put brackets around w
print(f"Prediction before training: f({x_test}) = {model(x_test).item(): .3f}") #(*)
# Training
for epoch in range(n_iters):
# Forward Pass
Y_pred = model(X)
# Loss Function
l = loss(Y, Y_pred)
# Gradient
l.backward()
# Parameter Update
optimizer.step()
# Set gradients to zero to prevent accumulation over the iterations
optimizer.zero_grad()
if epoch%10 == 0: # (*)
[w] = model.parameters()
# NOTE: Must add brackets around w
print(f"Iteration {epoch+1}: w = {w.item(): .3f}, loss = {l.item(): .8f}")
print(f"Prediction after training: f({x_test}) = {model(x_test).item(): .3f}") #(*)
Section 5
The code below is a direct inscription from this video:
class LinearRegression(nn.Module)
def __init__(self, input_dim, output_dim):
super(LinearRegression, self).__init__()
# define layers
self.lin = nn.Linear(input_dim, output_dim)
def forward(self, x):
return self.lin(x)
# Use:
# model = LinearRegression(input_size, output_size)