用numpy实现搭建一个简单的forward和backward
1 import numpy as np 2 N, D_in, H, D_out = 64, 1000, 100, 10 3 x = np.random.randn(N, D_in) # (64, 1000) 4 y = np.random.randn(N, D_out) # (64, 10) 5 w1 = np.random.randn(D_in, H) # (1000, 100) 6 w2 = np.random.randn(H, D_out) # (100, 10) 7 learning_rate = 1e-6 8 9 for t in range(2): 10 # Forward pass: compute predicted y 11 h = x.dot(w1) # (64, 100) 12 h_relu = np.maximum(h, 0) # (64, 100) 实现relu函数功能 13 y_pred = h_relu.dot(w2) # (64, 10) 14 15 loss = np.square(y_pred - y).sum() # sum()所有元素求和 16 # Backprop to compute gradients of w1 and w2 with respect to loss 17 grad_y_pred = 2.0 * (y_pred - y) 18 grad_w2 = h_relu.T.dot(grad_y_pred) 19 grad_h_relu = grad_y_pred.dot(w2.T) 20 grad_h = grad_h_relu.copy() # (64, 100) 21 grad_h[h < 0] = 0 # 在h中负元素对应位置处grad_h中置0 -> 实现relu函数功能 22 grad_w1 = x.T.dot(grad_h) # .T是转置 (1000, 100) 23 24 # Update weights 25 w1 -= learning_rate * grad_w1 # (1000, 100) 26 w2 -= learning_rate * grad_w2
用tensor实现搭建一个简单的forward和backward
1 import torch 2 3 dtype = torch.FloatTensor 4 # dtype = torch.cuda.FloatTensor 5 6 # N is batch size; D_in is input dimension; 7 # H is hidden dimension; D_out is output dimension. 8 N, D_in, H, D_out = 64, 1000, 100, 10 9 10 x = torch.randn(N, D_in).type(dtype) 11 y = torch.randn(N, D_out).type(dtype) 12 13 # Randomly initialize weights 14 w1 = torch.randn(D_in, H).type(dtype) 15 w2 = torch.randn(H, D_out).type(dtype) 16 17 learning_rate = 1e-6 18 for t in range(500): 19 # Forward pass: compute predicted y 20 h = x.mm(w1) # 与numpy对比,dot点乘 21 h_relu = h.clamp(min=0) 22 y_pred = h_relu.mm(w2) 23 24 loss = (y_pred - y).pow(2).sum() 25 # Backprop to compute gradients of w1 and w2 with respect to loss 26 grad_y_pred = 2.0 * (y_pred - y) 27 grad_w2 = h_relu.t().mm(grad_y_pred) 28 grad_h_relu = grad_y_pred.mm(w2.t()) 29 grad_h = grad_h_relu.clone() 30 grad_h[h < 0] = 0 31 grad_w1 = x.t().mm(grad_h) 32 33 # Update weights using gradient descent 34 w1 -= learning_rate * grad_w1 35 w2 -= learning_rate * grad_w2
用variable实现forward和backward
1 # use PyTorch Variables and autograd to implement our two-layer network;
# now we no longer need to manually implement the backward pass through the network 2 3 import torch 4 from torch.autograd import Variable 5 6 dtype = torch.FloatTensor 7 N, D_in, H, D_out = 64, 1000, 100, 10 8 9 # Setting requires_grad=False indicates that we do not need to compute gradients with respect to these Variables during the backward pass. 10 x = Variable(torch.randn(N, D_in).type(dtype), requires_grad=False) 11 y = Variable(torch.randn(N, D_out).type(dtype), requires_grad=False) 12 13 # Setting requires_grad=True indicates that we want to compute gradients with respect to these Variables during the backward pass. 14 w1 = Variable(torch.randn(D_in, H).type(dtype), requires_grad=True) 15 w2 = Variable(torch.randn(H, D_out).type(dtype), requires_grad=True) 16 17 learning_rate = 1e-6 18 for t in range(2): 19 # Forward pass: we do not need to keep references to intermediate values since we are not implementing the backward pass by hand 20 y_pred = x.mm(w1).clamp(min=0).mm(w2) 21 22 # Now loss is a Variable of shape (1,) and loss.data is a Tensor of shape (1,); loss.data[0] is a scalar value holding the loss. 23 loss = (y_pred - y).pow(2).sum() 24 # print(loss) # [torch.FloatTensor of size 1] 25 # print(loss.size()) # torch.Size([1]) 26 # print(loss.data) # [torch.FloatTensor of size 1] 27 print(loss.data[0]) 28 29 loss.backward() 30 31 w1.data -= learning_rate * w1.grad.data 32 w2.data -= learning_rate * w2.grad.data 33 34 w1.grad.data.zero_() 35 w2.grad.data.zero_()
用variable实现relu函数
1 import torch 2 from torch.autograd import Variable 3 4 class MyReLU(torch.autograd.Function): 5 def forward(self, input): 6 self.save_for_backward(input) 7 return input.clamp(min=0) 8 9 def backward(self, grad_output): 10 input, = self.saved_tensors 11 grad_input = grad_output.clone() 12 grad_input[input < 0] = 0 13 return grad_input 14 15 dtype = torch.FloatTensor 16 N, D_in, H, D_out = 64, 1000, 100, 10 17 18 x = Variable(torch.randn(N, D_in).type(dtype), requires_grad=False) 19 y = Variable(torch.randn(N, D_out).type(dtype), requires_grad=False) 20 21 w1 = Variable(torch.randn(D_in, H).type(dtype), requires_grad=True) 22 w2 = Variable(torch.randn(H, D_out).type(dtype), requires_grad=True) 23 24 learning_rate = 1e-6 25 for t in range(2): 26 relu = MyReLU() 27 28 # Forward pass 29 y_pred = relu(x.mm(w1)).mm(w2) 30 31 loss = (y_pred - y).pow(2).sum() 32 loss.backward() 33 34 w1.data -= learning_rate * w1.grad.data 35 w2.data -= learning_rate * w2.grad.data 36 37 w1.grad.data.zero_() 38 w2.grad.data.zero_()