PyTorch自动微分

Wpiper

已于 2022-03-16 21:57:50 修改

阅读量670

点赞数

分类专栏：深度学习文章标签： pytorch 深度学习人工智能

于 2022-03-16 21:57:25 首次发布

本文链接：https://blog.csdn.net/qq_51817638/article/details/123536201

版权

深度学习专栏收录该内容

2 篇文章 0 订阅

订阅专栏

PyTorch自动微分

大纲

requires_gard属性
tensor.backward()后向传播
grad属性
自动微分反向传播的案例
detach剥离tensor
detach注意点
with torch.no_grad()
pytorch中的torch,autograd.grad计算微分用法

requires_grad属性
torch,Tensor是包的核心类，如果将其属性requires_grad设置为true,则会开始跟踪对tensor的所有操作

x=torch.arange(9).view(3,3)
print(x.requires_grad)
#False
x=torch.rand(3,3,requires_grad=True)
print(x)
#tensor([[0.3431, 0.2753, 0.1635],
#        [0.5510, 0.1404, 0.2791],
#        [0.6795, 0.4329, 0.5543]], requires_grad=True)

tensor.backward()后向传播&grad属性

如果想计算导数，可以调用Tensor.backward()
所有计算节点的微分都会被保存在grad属性中

w=torch.ones(3,3,requires_grad=True)
y=torch.sum(torch.mm(w,x))
print(y)
#tensor(15.9818, grad_fn=<SumBackward0>)
y.backward()
print(y.grad)
#None
print(x.grad)
#tensor([[3., 3., 3.],
#        [3., 3., 3.],
#        [3., 3., 3.]])
print(w.grad)
#tensor([[2.0789, 1.3587, 1.3549],
#        [2.0789, 1.3587, 1.3549],
#        [2.0789, 1.3587, 1.3549]])

案例

import  torch
#创建一个张量，设置requires_grad=True来跟踪它的相关计算
x=torch.ones(2,2,requires_grad=True)
print(x)
#tensor([[1., 1.],
#        [1., 1.]], requires_grad=True)
#针对张量做一个操作
y=x+2
print(y)
#tensor([[3., 3.],
#        [3., 3.]], grad_fn=<AddBackward0>)
#y作为操作的结果被创建，所以它有grad_fn
print(y.grad_fn)
#<AddBackward0 object at 0x00000167AC585C48>
#针对y做更多操作
z=y*y*3
out=z.mean()
print(z,out)
#tensor([[27., 27.],
#        [27., 27.]], grad_fn=<MulBackward0>) tensor(27., grad_fn=<MeanBackward0>)
#我们现在后向传播
out.backward()
#打印梯度
print(x.grad)
#tensor([[4.5000, 4.5000],
#        [4.5000, 4.5000]])

在这里分析一下为什么是4.5
y=x+2
z=y*y*3
求均值
z=z/4
即：z=3*(x+2)^2/4
dz/dx=3/2*(x+2)
x=1代入
9/2=4.5

detach剥离tensor

停止 tensor 历史记录的跟踪，可以调用 .detach()，它将其与计算历史记录分离，并防止将来的计算被跟踪

import torch
x=torch.rand(3,3,requires_grad=True)
w=torch.ones(3,3,requires_grad=True)
print(x)
#tensor([[0.8293, 0.1627, 0.1203],
#        [0.5815, 0.8387, 0.7793],
#        [0.0707, 0.4137, 0.9285]], requires_grad=True)
print(w)
#tensor([[1., 1., 1.],
#        [1., 1., 1.],
#        [1., 1., 1.]], requires_grad=True)
yy=torch.mm(x,w)
print(yy)
#tensor([[1.1122, 1.1122, 1.1122],
#        [2.1995, 2.1995, 2.1995],
#        [1.4129, 1.4129, 1.4129]], grad_fn=<MmBackward>)
detached_yy=yy.detach()
y=torch.mean(yy)
y.backward()
print(yy.grad)
#None
print(w.grad)
#tensor([[0.1752, 0.1752, 0.1752],
#        [0.1699, 0.1699, 0.1699],
#        [0.1551, 0.1551, 0.1551]])
print(x.grad)
#tensor([[0.3333, 0.3333, 0.3333],
#        [0.3333, 0.3333, 0.3333],
#        [0.3333, 0.3333, 0.3333]])

注意：

1.当使用detach()分离tensor但是没有更改这个tensor时，并不会影响backward()

import torch
a = torch.tensor([0.8, 0.7, 0.3], requires_grad=True)
print(a.grad)
out = a.sigmoid()
print(out)

# 添加detach(),c的requires_grad为False
c = out.detach()
print(c)

# 使用新生成的Variable进行反向传播
out.sum().backward()
print(a.grad)
#None
#tensor([0.6900, 0.6682, 0.5744], grad_fn=<SigmoidBackward>)
#tensor([0.6900, 0.6682, 0.5744])
#tensor([0.2139, 0.2217, 0.2445])

2.当使用detach()分离tensor，然后用这个分离出来的tensor去求导数，会影响backward()，会出现错误

# %%
import torch
 
a = torch.tensor([0.8, 0.7, 0.3], requires_grad=True)
print(a.grad)
out = a.sigmoid()
print(out)
 
#添加detach(),c的requires_grad为False
c = out.detach()
print(c)
 
#使用新生成的Variable进行反向传播
c.sum().backward()
print(a.grad)

''' 执行结果
None
tensor([0.6900, 0.6682, 0.5744], grad_fn=<SigmoidBackward>)
tensor([0.6900, 0.6682, 0.5744])
RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

'''

with torch.no_grad()
包含的代码段不会计算微分

pytorch中的torch,autograd.grad计算微分用法

x = torch.randn(3, 4).requires_grad_(True)
for i in range(3):
    for j in range(4):
        x[i][j] = i + j
y = x ** 2
print(x)
#tensor([[0., 1., 2., 3.],
#        [1., 2., 3., 4.],
#        [2., 3., 4., 5.]], grad_fn=<CopySlices>)
print(y)
#tensor([[ 0.,  1.,  4.,  9.],
#        [ 1.,  4.,  9., 16.],
#        [ 4.,  9., 16., 25.]], grad_fn=<PowBackward0>)
weight=torch.ones(y.size())
print(weight)
dxdy=torch.autograd.grad(outputs=y,
                         inputs=x,
                         grad_outputs=weight,
                         retain_graph=True,
                         create_graph=True,
                         only_inputs=True)
print(dxdy)
print(dxdy[0])
#为什么是dxdy[0]，因为返回的导数dydx是一个tuple,求二次倒数的时候需要取上一次输出导数，也就是dydx这个
#tuple的第一个元素才是tensor
dy2dx2=torch.autograd.grad(outputs=dxdy[0],
                           inputs=x,
                           grad_outputs=weight,
                           retain_graph=True,
                           create_graph=True,
                           only_inputs=True)
print(dy2dx2[0])

x=torch.arange(9).view(3,3)
print(x.requires_grad)
#False
x=torch.rand(3,3,requires_grad=True)
print(x)
#tensor([[0.3431, 0.2753, 0.1635],
#        [0.5510, 0.1404, 0.2791],
#        [0.6795, 0.4329, 0.5543]], requires_grad=True)
w=torch.ones(3,3,requires_grad=True)
y=torch.sum(torch.mm(w,x))
print(y)
#tensor(15.9818, grad_fn=<SumBackward0>)
y.backward()
print(y.grad)
#None
print(x.grad)
#tensor([[3., 3., 3.],
#        [3., 3., 3.],
#        [3., 3., 3.]])
print(w.grad)
#tensor([[2.0789, 1.3587, 1.3549],
#        [2.0789, 1.3587, 1.3549],
#        [2.0789, 1.3587, 1.3549]])