在应用中,有时候需要遍历模型的各个层,查看其梯度情况等,或者是加载模型的参数字典后要检查是否加载正确。常用的有如下几种方法:
构造一个简单的模型:
import torch
import torch.nn as nn
class test(nn.Module):
def __init__(self):
super(test,self).__init__()
self.layer1 = nn.Linear(3,4)
self.layer2 = nn.Linear(4,3)
model = test()
1、named_parameters()
for _,param in enumerate(model.named_parameters()):
print(param)
print('----------------')
结果如下:
('layer1.weight', Parameter containing:
tensor([[ 0.4006, 0.0323, 0.1754],
[-0.2437, 0.4170, -0.0618],
[-0.2427, -0.2061, 0.3739],
[ 0.3389, -0.0788, -0.2026]], requires_grad=True))
----------------
('layer1.bias', Parameter containing:
tensor([ 0.1224, -0.1801, 0.5462, 0.2995], requires_grad=True))
----------------
('layer2.weight', Parameter containing:
tensor([[-0.4135, -0.3223, 0.4771, -0.2044],
[ 0.0495, 0.0935, -0.4844, -0.3231],
[ 0.2642, 0.2389, -0.0652, -0.1688]], requires_grad=True))
----------------
('layer2.bias', Parameter containing:
tensor([0.4030, 0.0506, 0.1843], requires_grad=True))
----------------
2、parameters()
for _,param in enumerate(model.parameters()):
print(param)
print('----------------')
Parameter containing:
tensor([[ 0.4006, 0.0323, 0.1754],
[-0.2437, 0.4170, -0.0618],
[-0.2427, -0.2061, 0.3739],
[ 0.3389, -0.0788, -0.2026]], requires_grad=True)
----------------
Parameter containing:
tensor([ 0.1224, -0.1801, 0.5462, 0.2995], requires_grad=True)
----------------
Parameter containing:
tensor([[-0.4135, -0.3223, 0.4771, -0.2044],
[ 0.0495, 0.0935, -0.4844, -0.3231],
[ 0.2642, 0.2389, -0.0652, -0.1688]], requires_grad=True)
----------------
Parameter containing:
tensor([0.4030, 0.0506, 0.1843], requires_grad=True)
----------------
可见,相比于上面的named_parameters()
方法,这个方法没有层的名字。
3、state_dict()
for _,param in enumerate(model.state_dict()):
print(param)
print(model.state_dict()[param])
print('----------------')
layer1.weight
tensor([[ 0.4006, 0.0323, 0.1754],
[-0.2437, 0.4170, -0.0618],
[-0.2427, -0.2061, 0.3739],
[ 0.3389, -0.0788, -0.2026]])
----------------
layer1.bias
tensor([ 0.1224, -0.1801, 0.5462, 0.2995])
----------------
layer2.weight
tensor([[-0.4135, -0.3223, 0.4771, -0.2044],
[ 0.0495, 0.0935, -0.4844, -0.3231],
[ 0.2642, 0.2389, -0.0652, -0.1688]])
----------------
layer2.bias
tensor([0.4030, 0.0506, 0.1843])
----------------
for中的_表示编号数字。
应用:
在自监督学习中,经常会采用一种叫做linprobe
的评价方法,这个方法是在finetune阶段固定住分类头之前的所有参数,只对分类头的参数进行训练,因此可以采用下面的代码对所有非分类头的参数设置为不需要梯度,这样在训练阶段就能固定住其参数了。
# freeze all but the head
for _, p in model.named_parameters():
p.requires_grad = False
for _, p in model.head.named_parameters():
p.requires_grad = True