首先创建Tensor x1,w1和w2。
-
x2 = x1 * w1
-
y = x2 * w2
-
L = Y - y
只要维度相协调,上述各个Tensor目前不必指定维度。实际上上述模型可以看做一个非常简单的神经网络,如下图所示。
在pytorch中,描述上述模型的完整代码
import torch
x = torch.ones(2,2, dtype=torch.float)
x1 = 2 * x
x1.requires_grad_(True)
w1 = 5 * x
w1.requires_grad_(True)
print('x =', x)
print("x1 =", x1)
print("w1 =", w1)
x2 = x1 * w1
w2 = 6 * x
w2.requires_grad_(True)
print("x2 =", x2)
print("w2 =", w2)
y = x2 * w2
Y = 10 * x
print("y =", y)
print("Y =", Y)
L = Y - y
print("L =", L)
L.backward(torch.ones(2,2, dtype=torch.float32))
print("x1.grad =", x1.grad)
print("w1.grad =", w1.grad)
print("w2.grad =", w2.grad)
写出该模型的数学表达式和梯度表达式
x
2
=
x
1
⋅
w
1
y
=
x
2
⋅
w
2
L
=
Y
−
y
∂
L
∂
x
1
=
∂
L
∂
y
∂
y
∂
x
2
∂
x
2
∂
x
1
∂
L
∂
w
1
=
∂
L
∂
y
∂
y
∂
x
2
∂
x
2
∂
w
1
∂
L
∂
w
2
=
∂
L
∂
y
∂
y
∂
w
2
∂
L
∂
y
=
−
1
\begin{array}{l} x_{2}=x_{1} \cdot w_{1} \\ y=x_{2} \cdot w_{2} \\ L=Y-y \\ \frac{\partial L}{\partial x_{1}}=\frac{\partial L}{\partial y} \frac{\partial y}{\partial x_{2}} \frac{\partial x_{2}}{\partial x_{1}} \\ \frac{\partial L}{\partial w_{1}}=\frac{\partial L}{\partial y} \frac{\partial y}{\partial x_{2}} \frac{\partial x_{2}}{\partial w_{1}} \\ \frac{\partial L}{\partial w_{2}}=\frac{\partial L}{\partial y} \frac{\partial y}{\partial w_{2}} \\ \frac{\partial L}{\partial y}=-1 \end{array}
x2=x1⋅w1y=x2⋅w2L=Y−y∂x1∂L=∂y∂L∂x2∂y∂x1∂x2∂w1∂L=∂y∂L∂x2∂y∂w1∂x2∂w2∂L=∂y∂L∂w2∂y∂y∂L=−1
import torch
x = torch.ones(2,2, dtype=torch.float,requires_grad=True)
x1 = 2 * x
# x1.requires_grad_(True)
w1 = 5 * x
# w1.requires_grad_(True)
print('x =', x)
print("x1 =", x1)
print("w1 =", w1)
x2 = x1 * w1
w2 = 6 * x
# w2.requires_grad_(True)
print("x2 =", x2)
print("w2 =", w2)
y = x2 * w2
Y = 10 * x
print("y =", y)
print("Y =", Y)
L = Y - y
print("L =", L)
L.backward(torch.ones(2,2, dtype=torch.float32))
print("x1.grad =", x1.grad)
print("w1.grad =", w1.grad)
print("w2.grad =", w2.grad)
如果在定义torch的时候将require_grad设置为True会出现以下情况:
x = tensor([[1., 1.],
[1., 1.]], requires_grad=True)
x1 = tensor([[2., 2.],
[2., 2.]], grad_fn=<MulBackward0>)
w1 = tensor([[5., 5.],
[5., 5.]], grad_fn=<MulBackward0>)
x2 = tensor([[10., 10.],
[10., 10.]], grad_fn=<MulBackward0>)
w2 = tensor([[6., 6.],
[6., 6.]], grad_fn=<MulBackward0>)
y = tensor([[60., 60.],
[60., 60.]], grad_fn=<MulBackward0>)
Y = tensor([[10., 10.],
[10., 10.]], grad_fn=<MulBackward0>)
L = tensor([[-50., -50.],
[-50., -50.]], grad_fn=<SubBackward0>)
x1.grad = None
w1.grad = None
w2.grad = None
我们计算的梯度值竟然是None。
这是因为,在这个方程组里面
这几个需要是叶子节点,也就是说,x1,w1,w2在输出的时候grad_fn这个参数的值必须是requires_grad=True
可以用is_leaf
属性查看是否是叶子节点
print(x1.is_leaf) # False
print(w1.is_leaf) # False
print(w2.is_leaf) # False
所以需要用.requires_grad_(True)
手动将其设置为叶子节点
import torch
x = torch.ones(2,2, dtype=torch.float)
x1 = 2 * x
x1.requires_grad_(True)
w1 = 5 * x
w1.requires_grad_(True)
print('x =', x)
print("x1 =", x1)
print("w1 =", w1)
x2 = x1 * w1
w2 = 6 * x
w2.requires_grad_(True)
print("x2 =", x2)
print("w2 =", w2)
y = x2 * w2
Y = 10 * x
print("y =", y)
print("Y =", Y)
L = Y - y
print("L =", L)
L.backward(torch.ones(2,2, dtype=torch.float32))
print("x1.grad =", x1.grad)
print("w1.grad =", w1.grad)
print("w2.grad =", w2.grad)
可以正常得到梯度值了
x = tensor([[1., 1.],
[1., 1.]])
x1 = tensor([[2., 2.],
[2., 2.]], requires_grad=True)
w1 = tensor([[5., 5.],
[5., 5.]], requires_grad=True)
x2 = tensor([[10., 10.],
[10., 10.]], grad_fn=<MulBackward0>)
w2 = tensor([[6., 6.],
[6., 6.]], requires_grad=True)
y = tensor([[60., 60.],
[60., 60.]], grad_fn=<MulBackward0>)
Y = tensor([[10., 10.],
[10., 10.]])
L = tensor([[-50., -50.],
[-50., -50.]], grad_fn=<SubBackward0>)
x1.grad = tensor([[-30., -30.],
[-30., -30.]])
w1.grad = tensor([[-12., -12.],
[-12., -12.]])
w2.grad = tensor([[-10., -10.],
[-10., -10.]])