PyTorch自动微分
大纲
- requires_gard属性
- tensor.backward()后向传播
- grad属性
- 自动微分反向传播的案例
- detach剥离tensor
- detach注意点
- with torch.no_grad()
- pytorch中的torch,autograd.grad计算微分用法
requires_grad属性
torch,Tensor是包的核心类,如果将其属性requires_grad设置为true,则会开始跟踪对tensor的所有操作
x=torch.arange(9).view(3,3)
print(x.requires_grad)
#False
x=torch.rand(3,3,requires_grad=True)
print(x)
#tensor([[0.3431, 0.2753, 0.1635],
# [0.5510, 0.1404, 0.2791],
# [0.6795, 0.4329, 0.5543]], requires_grad=True)
tensor.backward()后向传播&grad属性
如果想计算导数,可以调用Tensor.backward()
所有计算节点的微分都会被保存在grad属性中
w=torch.ones(3,3,requires_grad=True)
y=torch.sum(torch.mm(w,x))
print(y)
#tensor(15.9818, grad_fn=<SumBackward0>)
y.backward()
print(y.grad)
#None
print(x.grad)
#tensor([[3., 3., 3.],
# [3., 3., 3.],
# [3., 3., 3.]])
print(w.grad)
#tensor([[2.0789, 1.3587, 1.3549],
# [2.0789, 1.3587, 1.3549],
# [2.0789, 1.3587, 1.3549]])
案例
import torch
#创建一个张量,设置requires_grad=True来跟踪它的相关计算
x=torch.ones(2,2,requires_grad=True)
print(x)
#tensor([[1., 1.],
# [1., 1.]], requires_grad=True)
#针对张量做一个操作
y=x+2
print(y)
#tensor([[3., 3.],
# [3., 3.]], grad_fn=<AddBackward0>)
#y作为操作的结果被创建,所以它有grad_fn
print(y.grad_fn)
#<AddBackward0 object at 0x00000167AC585C48>
#针对y做更多操作
z=y*y*3
out=z.mean()
print(z,out)
#tensor([[27., 27.],
# [27., 27.]], grad_fn=<MulBackward0>) tensor(27., grad_fn=<MeanBackward0>)
#我们现在后向传播
out.backward()
#打印梯度
print(x.grad)
#tensor([[4.5000, 4.5000],
# [4.5000, 4.5000]])
在这里分析一下为什么是4.5
y=x+2
z=y*y*3
求均值
z=z/4
即:z=3*(x+2)^2/4
dz/dx=3/2*(x+2)
x=1代入
9/2=4.5
detach剥离tensor
停止 tensor 历史记录的跟踪,可以调用 .detach(),它将其与计算历史记录分离,并防止将来的计算被跟踪
import torch
x=torch.rand(3,3,requires_grad=True)
w=torch.ones(3,3,requires_grad=True)
print(x)
#tensor([[0.8293, 0.1627, 0.1203],
# [0.5815, 0.8387, 0.7793],
# [0.0707, 0.4137, 0.9285]], requires_grad=True)
print(w)
#tensor([[1., 1., 1.],
# [1., 1., 1.],
# [1., 1., 1.]], requires_grad=True)
yy=torch.mm(x,w)
print(yy)
#tensor([[1.1122, 1.1122, 1.1122],
# [2.1995, 2.1995, 2.1995],
# [1.4129, 1.4129, 1.4129]], grad_fn=<MmBackward>)
detached_yy=yy.detach()
y=torch.mean(yy)
y.backward()
print(yy.grad)
#None
print(w.grad)
#tensor([[0.1752, 0.1752, 0.1752],
# [0.1699, 0.1699, 0.1699],
# [0.1551, 0.1551, 0.1551]])
print(x.grad)
#tensor([[0.3333, 0.3333, 0.3333],
# [0.3333, 0.3333, 0.3333],
# [0.3333, 0.3333, 0.3333]])
注意:
1.当使用detach()分离tensor但是没有更改这个tensor时,并不会影响backward()
import torch
a = torch.tensor([0.8, 0.7, 0.3], requires_grad=True)
print(a.grad)
out = a.sigmoid()
print(out)
# 添加detach(),c的requires_grad为False
c = out.detach()
print(c)
# 使用新生成的Variable进行反向传播
out.sum().backward()
print(a.grad)
#None
#tensor([0.6900, 0.6682, 0.5744], grad_fn=<SigmoidBackward>)
#tensor([0.6900, 0.6682, 0.5744])
#tensor([0.2139, 0.2217, 0.2445])
2.当使用detach()分离tensor,然后用这个分离出来的tensor去求导数,会影响backward(),会出现错误
# %%
import torch
a = torch.tensor([0.8, 0.7, 0.3], requires_grad=True)
print(a.grad)
out = a.sigmoid()
print(out)
#添加detach(),c的requires_grad为False
c = out.detach()
print(c)
#使用新生成的Variable进行反向传播
c.sum().backward()
print(a.grad)
''' 执行结果
None
tensor([0.6900, 0.6682, 0.5744], grad_fn=<SigmoidBackward>)
tensor([0.6900, 0.6682, 0.5744])
RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn
'''
with torch.no_grad()
包含的代码段不会计算微分
pytorch中的torch,autograd.grad计算微分用法
x = torch.randn(3, 4).requires_grad_(True)
for i in range(3):
for j in range(4):
x[i][j] = i + j
y = x ** 2
print(x)
#tensor([[0., 1., 2., 3.],
# [1., 2., 3., 4.],
# [2., 3., 4., 5.]], grad_fn=<CopySlices>)
print(y)
#tensor([[ 0., 1., 4., 9.],
# [ 1., 4., 9., 16.],
# [ 4., 9., 16., 25.]], grad_fn=<PowBackward0>)
weight=torch.ones(y.size())
print(weight)
dxdy=torch.autograd.grad(outputs=y,
inputs=x,
grad_outputs=weight,
retain_graph=True,
create_graph=True,
only_inputs=True)
print(dxdy)
print(dxdy[0])
#为什么是dxdy[0],因为返回的导数dydx是一个tuple,求二次倒数的时候需要取上一次输出导数,也就是dydx这个
#tuple的第一个元素才是tensor
dy2dx2=torch.autograd.grad(outputs=dxdy[0],
inputs=x,
grad_outputs=weight,
retain_graph=True,
create_graph=True,
only_inputs=True)
print(dy2dx2[0])
x=torch.arange(9).view(3,3)
print(x.requires_grad)
#False
x=torch.rand(3,3,requires_grad=True)
print(x)
#tensor([[0.3431, 0.2753, 0.1635],
# [0.5510, 0.1404, 0.2791],
# [0.6795, 0.4329, 0.5543]], requires_grad=True)
w=torch.ones(3,3,requires_grad=True)
y=torch.sum(torch.mm(w,x))
print(y)
#tensor(15.9818, grad_fn=<SumBackward0>)
y.backward()
print(y.grad)
#None
print(x.grad)
#tensor([[3., 3., 3.],
# [3., 3., 3.],
# [3., 3., 3.]])
print(w.grad)
#tensor([[2.0789, 1.3587, 1.3549],
# [2.0789, 1.3587, 1.3549],
# [2.0789, 1.3587, 1.3549]])