GRL梯度反转层
1.GRL的定义和使用
在前向传播的时候,运算结果不变化,在梯度反向传播的时候,传递给前面节点的梯度变为原来的相反方向。
import torch
x = torch.tensor([1., 2., 3.], requires_grad=True)
y = torch.tensor([4., 5., 6.], requires_grad=True)
z = torch.pow(x, 2) + torch.pow(y, 2) # z = [17, 29, 45]
f = z + x + y # f = [22, 36, 54]
class _GradientScalarLayer(torch.autograd.Function):
@staticmethod
def forward(ctx, input, weight):
# ctx是一个上下文对象,可用于存储信息以进行反向计算
ctx.weight = weight
return input.view_as(input)
@staticmethod
def backward(ctx, grad_output):
grad_input = grad_output.clone()
return ctx.weight*grad_input, None
gradient_scalar = _GradientScalarLayer.apply
class GradientScalarLayer(torch.nn.Module):
def __init__(self, weight):
super(GradientScalarLayer, self).__init__()
self.weight = weight
def forward(self, input):
return gradient_scalar(input, self.weight)
def __repr__(self):
tmpstr = self.__class__.__name__ + "("
tmpstr += "weight=" + str(self.weight)
tmpstr += ")"
return tmpstr
s = 6 * f.sum() # f.sum() = 22 + 36 + 54 = 112; s = 6 * 112 = 672
Grl = GradientScalarLayer(-1)
s = Grl(s)
print(s) # tensor(672., grad_fn=<_GradientScalarLayerBackward>)
s.backward()
print(x) # tensor([1., 2., 3.], requires_grad=True)
# 求s对x的导数,s对x的导数 = -6 * f对x的导数 = -6 * (2x + 1) = -12 * [1, 2, 3] - 6 = [-18, -30, -42]
print(x.grad) # tensor([-18., -30., -42.])
结果:
tensor(672., grad_fn=<_GradientScalarLayerBackward>)
tensor([1., 2., 3.], requires_grad=True)
tensor([-18., -30., -42.])
2.计算非叶子节点梯度(None)
import torch
x = torch.tensor([1., 2., 3.], requires_grad=True)
y = torch.tensor([4., 5., 6.], requires_grad=True)
z = torch.pow(x, 2) + torch.pow(y, 2) # z = [17, 29, 45]
f = z + x + y # f = [22, 36, 54]
class _GradientScalarLayer(torch.autograd.Function):
@staticmethod
def forward(ctx, input, weight):
# ctx是一个上下文对象,可用于存储信息以进行反向计算
ctx.weight = weight
return input.view_as(input)
@staticmethod
def backward(ctx, grad_output):
grad_input = grad_output.clone()
return ctx.weight*grad_input, None
gradient_scalar = _GradientScalarLayer.apply
class GradientScalarLayer(torch.nn.Module):
def __init__(self, weight):
super(GradientScalarLayer, self).__init__()
self.weight = weight
def forward(self, input):
return gradient_scalar(input, self.weight)
# 显示属性
def __repr__(self):
tmpstr = self.__class__.__name__ + "("
tmpstr += "weight=" + str(self.weight)
tmpstr += ")"
return tmpstr
Grl = GradientScalarLayer(-1)
# f经过梯度反转层后
s = Grl(f)
s1 = 6 * s.sum() # s.sum() = 22 + 36 + 54 = 112; s1 = 6 * 112 = 672
print(s1) # tensor(672., grad_fn=<MulBackward0>)
s1.backward()
print(x) # tensor([1., 2., 3.], requires_grad=True)
print(f) # tensor([22., 36., 54.], grad_fn=<AddBackward0>)
print(s.grad) # None
print(f.grad) # None
# 验证是否是叶子节点
print("x is leaf:", x.is_leaf) # True
print("y is leaf:", y.is_leaf) # True
print("f is leaf:", f.is_leaf) # False
print("s is leaf:", s.is_leaf) # False
结果:
tensor(672., grad_fn=<MulBackward0>)
tensor([1., 2., 3.], requires_grad=True)
tensor([22., 36., 54.], grad_fn=<AddBackward0>)
None
None
如下图所示,Torch.backward在默认情况下仅累积叶子节点张量的梯度。因此,F.grad没有值,因为F张量不是叶子节点张量,只有A、B是叶子节点。
# 验证是否是叶子节点
print("x is leaf:", x.is_leaf)
print("y is leaf:", y.is_leaf)
print("f is leaf:", f.is_leaf)
print("s is leaf:", s.is_leaf)
结果:
x is leaf: True
y is leaf: True
f is leaf: False
s is leaf: False
3.计算非叶子节点梯度(retain_grad)
积累非叶子节点的梯度,可以使用retain_grad方法如下:
import torch
from torch.autograd import Function
x = torch.tensor([1., 2., 3.], requires_grad=True)
y = torch.tensor([4., 5., 6.], requires_grad=True)
z = torch.pow(x, 2) + torch.pow(y, 2)
f = z + x + y # f = [22, 36, 54]
f.retain_grad() # f是叶子节点
class _GradientScalarLayer(torch.autograd.Function):
@staticmethod
def forward(ctx, input, weight):
# ctx是一个上下文对象,可用于存储信息以进行反向计算
ctx.weight = weight
return input.view_as(input)
@staticmethod
def backward(ctx, grad_output):
grad_input = grad_output.clone()
return ctx.weight * grad_input, None
gradient_scalar = _GradientScalarLayer.apply
class GradientScalarLayer(torch.nn.Module):
def __init__(self, weight):
super(GradientScalarLayer, self).__init__()
self.weight = weight
def forward(self, input):
return gradient_scalar(input, self.weight)
# 显示属性
def __repr__(self):
tmpstr = self.__class__.__name__ + "("
tmpstr += "weight=" + str(self.weight)
tmpstr += ")"
return tmpstr
Grl = GradientScalarLayer(-1)
# f经过梯度反转层后
s = Grl(f)
s.retain_grad() # s是叶子节点
s1 = 6 * s.sum() # s.sum() = 22 + 36 + 54 = 112; s1 = 6 * 112 = 672
print(s1) # tensor(672., grad_fn=<MulBackward0>)
s1.backward()
print(x) # tensor([1., 2., 3.], requires_grad=True)
print(f) # tensor([22., 36., 54.], grad_fn=<AddBackward0>)
print(s.grad) # tensor([6., 6., 6.])
print(f.grad) # tensor([-6., -6., -6.])
结果:
tensor(672., grad_fn = < MulBackward0 >)
tensor([1., 2., 3.], requires_grad=True)
tensor([22., 36., 54.], grad_fn = < AddBackward0 >)
tensor([6., 6., 6.])
tensor([-6., -6., -6.])
4.训练梯度反向层
1.验证分类器有效:在调试GRL前,先单独对分类器进行训练,确保网络能够进行分类;
2.验证GRL是否工作:在输出结果层(最后一层)后面加上GRL,令整个梯度都是反向的,并且将GRL参数设置为常数-1,观察训练loss是否越来越大;
3.以上功能确保正常后,即可将GRL加到网络进行训练。理想的网络分类器损失是先下降后增加的趋势,最后网络无法对其分类。