pytorch求梯度, MSE损失函数用sum和mean的区别
1. z.mean()取均值操作,梯度计算如下
import torch
x = torch.ones(2, 2, requires_grad=True)
print(x)
# tensor([[1., 1.],
# [1., 1.]], requires_grad=True)
y = x + 2
print(y)
# tensor([[3., 3.],
# [3., 3.]], grad_fn=<AddBackward0>)
print(y.grad_fn)
# <AddBackward0 object at 0x7f268025e080>
z = y * y * 3
print(z)
# tensor([[27., 27.],
# [27., 27.]], grad_fn=<MulBackward0>)
out = z.mean()
print(out)
# tensor(27., grad_fn=<MeanBackward0>)
out.backward() # 梯度回传,必须要这一步才能计算z对x的梯度
print(x.grad)
# tensor([[4.5000, 4.5000],
# [4.5000, 4.5000]])
解析:
z=3*y*y=3*(x+2)(x+2)
out=z.mean=
out 对各个x_i 求导
将x的值带入,得到结果
# tensor([[4.5000, 4.5000],
# [4.5000, 4.5000]])
2. z.sum()取求和操作,梯度计算如下
import torch
x1 = torch.ones(2, 2, requires_grad=True)
print(x1)
# tensor([[1., 1.],
# [1., 1.]], requires_grad=True)
y1 = x1 + 2
print(y1)
# tensor([[3., 3.],
# [3., 3.]], grad_fn=<AddBackward0>)
print(y1.grad_fn)
# <AddBackward0 object at 0x7f268025e080>
z1 = y1 * y1 * 3
print(z1)
# tensor([[27., 27.],
# [27., 27.]], grad_fn=<MulBackward0>)
out2 = z1.sum()
print(out2)
# tensor(27., grad_fn=<MeanBackward0>)
out2.backward() # 梯度回传,必须要这一步才能计算z对x的梯度
print(x1.grad)
# tensor([[18., 18.],
# [18., 18.]])
解析:
z1=3*y1*y1=3*(x1+2)(x1+2)
out2=z1.sum=
out2 对x1 求导 =
将x的值带入,得到结果
# tensor([[18., 18.],
# [18., 18.]])