PyTorch:张量 与 Autograd
这里我们准备一个三阶多项式,通过最小化平方欧几里得距离来训练,并预测函数
y = sin(x)
在-pi
到pi
上的值。
此实现使用了 PyTorch 张量(tensor)运算来实现前向传播,并使用 PyTorch Autograd 来计算梯度。
PyTorch 张量表示计算图中的一个节点。 如果x
是一个张量,且x.requires_grad=True
,则x.grad
是另一个张量,它保存了x
相对于某个标量值的梯度。
import torch import math dtype = torch.float device = torch.device("cpu") # device = torch.device("cuda:0") # Uncomment this to run on GPU # Create Tensors to hold input and outputs. # By default, requires_grad=False, which indicates that we do not need to # compute gradients with respect to these Tensors during the backward pass. x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype) y = torch.sin(x) # Create random Tensors for weights. For a third order polynomial, we need # 4 weights: y = a + b x + c x^2 + d x^3 # Setting requires_grad=True indicates that we want to compute gradients with # respect to these Tensors during the backward pass. a = torch.randn((), device=device, dtype=dtype, requires_grad=True) b = torch.randn((), device=device, dtype=dtype, requires_grad=True) c = torch.randn((), device=device, dtype=dtype, requires_grad=True) d = torch.randn((), device=device, dtype=dtype, requires_grad=True) learning_rate = 1e-6 for t in range(2000): # Forward pass: compute predicted y using operations on Tensors. y_pred = a + b * x + c * x ** 2 + d * x ** 3 # Compute and print loss using operations on Tensors. # Now loss is a Tensor of shape (1,) # loss.item() gets the scalar value held in the loss. loss = (y_pred - y).pow(2).sum() if t % 100 == 99: print(t, loss.item()) # Use autograd to compute the backward pass. This call will compute the # gradient of loss with respect to all Tensors with requires_grad=True. # After this call a.grad, b.grad. c.grad and d.grad will be Tensors holding # the gradient of the loss with respect to a, b, c, d respectively. loss.backward() # Manually update weights using gradient descent. Wrap in torch.no_grad() # because weights have requires_grad=True, but we don't need to track this # in autograd. with torch.no_grad(): a -= learning_rate * a.grad b -= learning_rate * b.grad c -= learning_rate * c.grad d -= learning_rate * d.grad # Manually zero the gradients after updating weights a.grad = None b.grad = None c.grad = None d.grad = None print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3')
PyTorch:定义新的 Autograd 函数
这里我们准备一个三阶多项式,通过最小化平方欧几里得距离来训练,并预测函数
y = sin(x)
在-pi
到pi
上的值。
这里我们不将多项式写为y = a + bx + cx^2 + dx^3
,而是将多项式写为y = a + bP_3(c + dx)
,其中P_3(x) = 1/2 (5x ^ 3 - 3x)
是三次勒让德多项式。
此实现使用了 PyTorch 张量(tensor)运算来实现前向传播,并使用 PyTorch Autograd 来计算梯度。
在此实现中,我们实现了自己的自定义 Autograd 函数来执行P'_3(x)
。 从数学定义上讲,P'_3(x) = 3/2 (5x ^ 2 - 1)
:
import torch import math class LegendrePolynomial3(torch.autograd.Function): """ We can implement our own custom autograd Functions by subclassing torch.autograd.Function and implementing the forward and backward passes which operate on Tensors. """ @staticmethod def forward(ctx, input): """ In the forward pass we receive a Tensor containing the input and return a Tensor containing the output. ctx is a context object that can be used to stash information for backward computation. You can cache arbitrary objects for use in the backward pass using the ctx.save_for_backward method. """ ctx.save_for_backward(input) return 0.5 * (5 * input ** 3 - 3 * input) @staticmethod def backward(ctx, grad_output): """ In the backward pass we receive a Tensor containing the gradient of the loss with respect to the output, and we need to compute the gradient of the loss with respect to the input. """ input, = ctx.saved_tensors return grad_output * 1.5 * (5 * input ** 2 - 1) dtype = torch.float device = torch.device("cpu") # device = torch.device("cuda:0") # Uncomment this to run on GPU # Create Tensors to hold input and outputs. # By default, requires_grad=False, which indicates that we do not need to # compute gradients with respect to these Tensors during the backward pass. x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype) y = torch.sin(x) # Create random Tensors for weights. For this example, we need # 4 weights: y = a + b * P3(c + d * x), these weights need to be initialized # not too far from the correct result to ensure convergence. # Setting requires_grad=True indicates that we want to compute gradients with # respect to these Tensors during the backward pass. a = torch.full((), 0.0, device=device, dtype=dtype, requires_grad=True) b = torch.full((), -1.0, device=device, dtype=dtype, requires_grad=True) c = torch.full((), 0.0, device=device, dtype=dtype, requires_grad=True) d = torch.full((), 0.3, device=device, dtype=dtype, requires_grad=True) learning_rate = 5e-6 for t in range(2000): # To apply our Function, we use Function.apply method. We alias this as 'P3'. P3 = LegendrePolynomial3.apply # Forward pass: compute predicted y using operations; we compute # P3 using our custom autograd operation. y_pred = a + b * P3(c + d * x) # Compute and print loss loss = (y_pred - y).pow(2).sum() if t % 100 == 99: print(t, loss.item()) # Use autograd to compute the backward pass. loss.backward() # Update weights using gradient descent with torch.no_grad(): a -= learning_rate * a.grad b -= learning_rate * b.grad c -= learning_rate * c.grad d -= learning_rate * d.grad # Manually zero the gradients after updating weights a.grad = None b.grad = None c.grad = None d.grad = None print(f'Result: y = {a.item()} + {b.item()} * P3({c.item()} + {d.item()} x)')