lesson - 05 - 01 - autograd
autograd–自动求导系统
import torch as tt
tt.manual_seed(10)
<torch._C.Generator at 0x6079f10>
torch.autograd.backward(tensors, # 用于求导的张量, 如loss
grad_tensors = None, # 保存计算图
retain_graph = None, # 创建导数计算图, 用于高阶求导
create_graph = False) # 多梯度权重
-
参数tensors如果是标量,函数backward计算参数tensors对于给定图叶子节点的梯度( graph leaves,即为设置requires_grad=True的变量)。
-
参数tensors如果不是标量,需要另外指定参数grad_tensors,参数grad_tensors必须和参数tensors的长度相同。在这一种情况下,backward实际上实现的是代价函数(loss=tensors.dot(grad_tensors),内积)关于叶子节点的梯度计算,而不是参数tensors对于给定图叶子节点的梯度。如果指定参数grad_tensors=torch.ones((size(tensors))),显而易见,代价函数关于叶子节点的梯度,也就等于参数tensors对于给定图叶子节点的梯度。
# retain_graph
# a = w + x ; b = w + 1 ; y = a*b = (x + w) * (w + 1) = w^2 + wx ; w = 1 ; x = 2
w = tt.tensor([1.], requires_grad = True)
x = tt.tensor([2.], requires_grad = True)
a = tt.add(w, x)
b = tt.add(w, 1)
y = tt.mul(a, b) # y = (x + w) * (w + 1)
y.backward(retain_graph = True)
print(w.grad)
y.backward()
tensor([5.])
# grad_tensors 设置多个梯度之间的权重
w = tt.tensor([1.], requires_grad = True)
x = tt.tensor([2.], requires_grad = True)
a = tt.add(w, x) # retain_grad()
b = tt.add(w, 1)
y0 = tt.mul(a,b) # y0 = (x + w) * (w + 1)
y1 = tt.add(a,b) # y1 = (x + w) + (w + 1) dy1/dw = 2
loss = tt.cat([y0, y1], dim = 0) # [y0, y1]
'''
C = torch.cat( (A,B),0 ) #按维数0拼接(竖着拼)
C = torch.cat( (A,B),1 ) #按维数1拼接(横着拼)
'''
grad_tensors = tt.tensor([2., 1.])
loss.backward(gradient = grad_tensors)
# gradient 传入 torch.autograd.backward()中的grad_tensors,
# 这里的gradient其实就是y0, y1的权重; 相当于dy0/dw(w=1,x=2)*2+dy1/dw(w=1. x=2)*1 = 5*2+2*1 = 12
print(w.grad)
tensor([12.])
# 关于grad的另一个例子
x = tt.randn((1),dtype=tt.float32,requires_grad=True)
y = tt.randn((1),dtype=tt.float32,requires_grad=True)
z = tt.randn((1),dtype=tt.float32,requires_grad=True)
t = x + y
loss = t.dot(z) #求向量的内积
loss.backward(retain_graph=True)
print(x,'\n', y,'\n', z,'\n')
print(z,x.grad,y.grad,'\n') #预期打印出的结果都一样
print(t,z.grad,'\n') #预期打印出的结果都一样
print(t.grad) #在这个例子中,x,y,z就是叶子节点,而t不是,因而预期结果是None
tensor([1.1083], requires_grad=True)
tensor([0.5820], requires_grad=True)
tensor([0.6164], requires_grad=True)
tensor([0.6164], requires_grad=True) tensor([0.6164]) tensor([0.6164])
tensor([1.6904], grad_fn=<AddBackward0>) tensor([1.6904])
None
# autograd.grad
x = tt.tensor([3.], requires_grad = True)
y = tt.pow(x, 2) # y = x**2
grad_1 = tt.autograd.grad(y, x, create_graph = True)
# grad_1 = dy/dx = 2x = 2*3 = 6
print(grad_1)
grad_2 = tt.autograd.grad(grad_1[0], x)
# grad_2 = d(dy/dx)/dx = d(2x)/dx = 2
print(grad_2)
(tensor([6.], grad_fn=<MulBackward0>),)
(tensor([2.]),)
# tips:1 梯度不会自动清零
w = tt.tensor([1.], requires_grad=True)
x = tt.tensor([3.], requires_grad=True)
for i in range(4):
a = tt.add(w, x)# a = w + x
b = tt.add(w, 1)# b = w + 1
y = tt.mul(a, b)# y = w*w + x*w + x + w
y.backward()
print(w.grad)# dy/dw = 2*w + x + 1
w.grad.zero_() # 置零
tensor([6.])
tensor([6.])
tensor([6.])
tensor([6.])
# tips 2 依赖于叶子结点的节点,
w = tt.tensor([1.], requires_grad=True)
x = tt.tensor([2.], requires_grad=True)
a = tt.add(w, x)
b = tt.add(w, 1)
y = tt.mul(a, b)
print(a.requires_grad, b.requires_grad, y.requires_grad)
True True True
# tips 3-1
a = tt.ones((1, ))
print(id(a), a)
# a = a + torch.ones((1, ))
# print(id(a), a)
a += tt.ones((1, ))
print(id(a), a)
160732576 tensor([1.])
160732576 tensor([2.])
# tips 3-2 叶子结点不可以进行in_place操作
w = tt.tensor([1.], requires_grad=True)
x = tt.tensor([2.], requires_grad=True)
a = tt.add(w, x)
b = tt.add(w, 1)
y = tt.mul(a, b)
a.add_(1)
y.backward()
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-34-67fcd7b7c039> in <module>
9 a.add_(1)
10
---> 11 y.backward()
~\Anaconda3\lib\site-packages\torch\tensor.py in backward(self, gradient, retain_graph, create_graph)
148 products. Defaults to ``False``.
149 """
--> 150 torch.autograd.backward(self, gradient, retain_graph, create_graph)
151
152 def register_hook(self, hook):
~\Anaconda3\lib\site-packages\torch\autograd\__init__.py in backward(tensors, grad_tensors, retain_graph, create_graph, grad_variables)
97 Variable._execution_engine.run_backward(
98 tensors, grad_tensors, retain_graph, create_graph,
---> 99 allow_unreachable=True) # allow_unreachable flag
100
101
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor [1]], which is output 0 of AddBackward0, is at version 1; expected version 0 instead. Hint: enable anomaly detection to find the operation that failed to compute its gradient, with torch.autograd.set_detect_anomaly(True).