参考资料:https://pytorch.org/tutorials/beginner/pytorch_with_examples.html
Numpy是一个强有力的框架,但是不能使用GPU进行数值计算,故numpy对于现有的深度学习是不足够的。
Pytorch的一个基础概念:张量
在概念上pytorch的张量等同于numpy array,张量可以看作n维的数组,pytorch提供了很多供张量使用的函数,而且可以追踪计算图和梯度,pytorch可以利用GPUs加速数值计算。
Numpy 实现前向和反向传播
import numpy as np
N, D_in, H, D_out = 64, 1000, 100, 10
x = np.random.randn(N, D_in)
y = np.random.randn(N, D_out)
w1 = np.random.randn(D_in, H)
w2 = np.random.randn(H, D_out)
learning_rate = 1e-6
for t in range(500):
h = x.dot(w1)
h_relu = np.maximum(h, 0)
y_pred = h_relu.dot(w2)
loss = np.square(y_pred - y).sum()
print(t, loss)
# 反向传播
grad_y_pred = 2.0 * (y_pred - y)
grad_w2 = h_relu.T.dot(grad_y_pred)
grad_h_relu = grad_y_pred.dot(w2.T)
grad_h = grad_h_relu.copy()
grad_h[h < 0] = 0
grad_w1 = x.T.dot(grad_h)
# 更新权重
w1 -= learning_rate * grad_w1
w2 -= learning_rate * grad_w2
torch-tensors 实现前向和反向传播
import torch
dtype = torch.float
device = torch.device("cpu")
# N表示batch_size, D_in为输入数据的维度
# H是隐层的维度,D_out表示输出数据的维度
N, D_in, H, D_out = 64, 1000, 100, 10
x = torch.randn(N, D_in, device=device, dtype=dtype)
y = torch.randn(N, D_out, device=device, dtype=dtype)
w1 = torch.randn(D_in, H, device=device, dtype=dtype)
w2 = torch.randn(H, D_out, device=device, dtype=dtype)
learning_rate = 1e-6
for t in range(500):
# 前向传播
h = x.mm(w1)
# clamp()将随机变化的数值限定在一个给定的区间[min, max],并返回一个tensor
h_relu = h.clamp(min=0)
y_pred = h_relu.mm(w2)
loss = (y_pred - y).pow(2).sum().item()
print(t, loss)
# 反向传播
grad_y_pred = 2.0 * (y_pred - y)
grad_w2 = h_relu.t().mm(grad_y_pred)
grad_h_relu = grad_y_pred.mm(w2.t())
grad_h = grad_h_relu.clone()
grad_h[h < 0] = 0
grad_w1 = x.t().mm(grad_h)
w1 -= learning_rate * grad_w1
w2 -= learning_rate * grad_w2
torch之mm(),matmul(),mul()函数
import torch
a = torch.randn(1, 2)
b = torch.randn(1, 2)
c = torch.randn(2, 3)
print(torch.mul(a, b)) # 返回 1*2 的tensor
print(torch.mm(a, c)) # 返回 1*3 的tensor
print(torch.matmul(a, c)) # 返回 1*3的tensor
# print(torch.mm(a, b)) # 报错 维度不一致
# print(torch.matmul(a, b)) # 报错 维度不一致
torch自动求导
import torch
dtype = torch.float
device = torch.device("cpu")
# N表示batch_size, D_in为输入数据的维度
# H是隐层的维度,D_out表示输出数据的维度
N, D_in, H, D_out = 64, 1000, 100, 10
x = torch.randn(N, D_in, device=device, dtype=dtype)
y = torch.randn(N, D_out, device=device, dtype=dtype)
# 注意:requires_grad=True
w1 = torch.randn(D_in, H, device=device, dtype=dtype, requires_grad=True)
w2 = torch.randn(H, D_out, device=device, dtype=dtype, requires_grad=True)
learning_rate = 1e-6
for t in range(500):
y_pred = x.mm(w1).clamp(min=0).mm(w2)
loss = (y_pred - y).pow(2).sum()
print(t, loss.item())
# 自动求导,梯度反向传播
loss.backward()
# 手动更新权重
with torch.no_grad():
w1 -= learning_rate * w1.grad
w2 -= learning_rate * w2.grad
# 梯度清零
w1.grad.zero_()
w2.grad.zero_()