PyTorch的参数固定以及detach clone

detach

detach()后的tensor与原tensor共享数据内存,当原始tensor在计算图中数值发生反向传播等更新之后,detach()的tensor值也发生了改变

import torch
from torch import optim
from torch.nn import Parameter

x = torch.tensor(1.)
a = torch.tensor(1., requires_grad=True)
b = Parameter(torch.tensor(2.))

y = a**2 * x + b * x

z = y**2 + 2*y

optimizer = optim.SGD([a, b], lr=0.01)

ta = a.detach()
tb = b.detach()

print('before:', a, b, ta, tb)
print()

optimizer.zero_grad()
z.backward()
optimizer.step()

print('after:', a, b, ta, tb)

# before: tensor(1., requires_grad=True) Parameter containing:
# tensor(2., requires_grad=True) tensor(1.) tensor(2.)
#
# after: tensor(0.8400, requires_grad=True) Parameter containing:
# tensor(1.9200, requires_grad=True) tensor(0.8400) tensor(1.9200)

clone

clone使用了新的内存,当原始tensor在计算图中数值发生反向传播等更新之后,clone()的tensor值不会发生变化

如果新的tensor从 tensor(requires_grad=True)或者Parameter 克隆而来,则其grad_fn=,即表示其仍可以作为中间节点传播梯度,相当于一次恒等映射。

import torch
from torch import optim
from torch.nn import Parameter

x = torch.tensor(1.)
a = torch.tensor(1., requires_grad=True)
b = Parameter(torch.tensor(2.))

y = a**2 * x + b * x

z = y**2 + 2*y

optimizer = optim.SGD([a, b], lr=0.01)

ta = a.clone()
tb = b.clone()

print('before:', a, b, ta, tb)
print()

optimizer.zero_grad()
z.backward()
optimizer.step()

print('before:', a, b, ta, tb)

# before: tensor(1., requires_grad=True) Parameter containing:
# tensor(2., requires_grad=True) tensor(1., grad_fn=<CloneBackward>) tensor(2., grad_fn=<CloneBackward>)
# 
# before: tensor(0.8400, requires_grad=True) Parameter containing:
# tensor(1.9200, requires_grad=True) tensor(1., grad_fn=<CloneBackward>) tensor(2., grad_fn=<CloneBackward>)

import torch
from torch import optim
from torch.nn import Parameter

x = torch.tensor(1.)
a = torch.tensor(1., requires_grad=True)
b = Parameter(torch.tensor(2.))

y = a**2 * x + b * x

z = y**2 + 2*y

# 从z -> z2相当于一个恒等映射,梯度还是能够传播的
z2 = z.clone()

optimizer = optim.SGD([a, b], lr=0.01)
print('before:', a, b)
print()

optimizer.zero_grad()
z2.backward()
optimizer.step()

print('before:', a, b)

# before: tensor(1., requires_grad=True) Parameter containing:
# tensor(2., requires_grad=True)
# 
# before: tensor(0.8400, requires_grad=True) Parameter containing:
# tensor(1.9200, requires_grad=True)

lambda

from functools import reduce

foo = [2, 18, 9, 22, 17, 24, 8, 12, 27]

# lambda语句中,冒号前是参数,可以有多个,用逗号隔开,冒号右边的返回值。
# 搭配filter
print(filter(lambda x: x % 3 == 0, foo))
print(list(filter(lambda x: x % 3 == 0, foo)))
print()

# 搭配map
print(map(lambda x: x * 2 + 10, foo))
print(list(map(lambda x: x * 2 + 10, foo)))
print()

# 搭配reduce
# reduce累加,lambda先计算x2 + y18 = 20,然后x20 + y9 = 29
print(reduce(lambda x, y: x + y, foo))

# <filter object at 0x000002206C252A88>
# [18, 9, 24, 12, 27]
# 
# <map object at 0x000002206C1FF608>
# [14, 46, 28, 54, 44, 58, 26, 34, 64]
# 
# 139

detach固定部分参数

缺点:只能固定detach之前的所有参数

# 在Net.forward里删除out = out.detach(),结果为:
# layer1.weight False
# layer1.bias False
# layer2.weight False
# layer2.bias False
# 即所有参数发生了变化(优化)

# 在Net.forward里添加out = out.detach(),结果为:
# layer1.weight True
# layer1.bias True
# layer2.weight False
# layer2.bias False
# 即self.layer1里的参数没有反生变化,这是因为out = out.detach()返回的tensor不能传播梯度,
# 所以在反向传播至该tensor时,不能再向前传播,所以其之前的参数将被锁定

import torch
from torch import nn
from torch.nn import functional as F
from torch import optim


class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()

        self.layer1 = nn.Linear(10, 5)
        self.layer2 = nn.Linear(5, 3)

    def forward(self, x):
        out = self.layer1(x)
        out = out.detach()
        out = F.relu(self.layer2(out))
        return out


net = Net()
optimizer = optim.SGD(net.parameters(), lr=0.01)

input = torch.randn(8, 10)

# 把训练前各个参数的值存储起来
storeParam = {}
for name, param in net.named_parameters():
    storeParam[name] = param.detach().clone()

for i in range(100):
    out = net(input)
    loss = F.mse_loss(out, torch.zeros(8, 3))
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

# 比较训练前后各个参数的值是否相同
for name, param in net.named_parameters():
    print(f"{name} {torch.equal(param, storeParam[name])}")

requires_grad = False固定部分参数

# 通过self.layer1.weight.requires_grad = False,只固定该参数

# loss的requires_grad=False则其不能调用backward;某non-leaf Tensor的requires_grad=False则其之前的参数由于梯度反向传播时被截断,所以不会得到更新;leaf Tensor的requires_grad=False,表示其不需要梯度,所以其也不能被更新。

import torch
from torch import nn
from torch.nn import functional as F
from torch import optim


class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()

        self.layer1 = nn.Linear(10, 5)
        self.layer1.weight.requires_grad = False
        self.layer2 = nn.Linear(5, 3)

    def forward(self, x):
        out = self.layer1(x)
        out = F.relu(self.layer2(out))
        return out


net = Net()
optimizer = optim.SGD(net.parameters(), lr=0.01)

input = torch.randn(8, 10)

# 把训练前各个参数的值存储起来
storeParam = {}
for name, param in net.named_parameters():
    storeParam[name] = param.detach().clone()

for i in range(100):
    out = net(input)
    loss = F.mse_loss(out, torch.zeros(8, 3))
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

# 比较训练前后各个参数的值是否相同
for name, param in net.named_parameters():
    print(f"{name} {torch.equal(param, storeParam[name])}")
    
# layer1.weight True
# layer1.bias False
# layer2.weight False
# layer2.bias False
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

MallocLu

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值