目录
1 仅导入前几层参数
1.1 不相同的网络模型
这里演示如何仅迁移前1层参数,代码如下:
import numpy as np
import torch
from torch.nn import Module
from torch import nn
from torch.nn import functional as F
# 预训练的网络模型 Net1
class Net1(Module):
def __init__(self, category_size):
super(Net1, self).__init__()
self.conv1 = nn.Conv2d(1, 1, 3, 1)
self.conv2 = nn.Conv2d(1, 1, 1, 1)
self.fc1 = nn.Linear(1, category_size)
self._init_parameters()
def forward(self, x):
batch_size = x.size(0)
x = self.conv1(x)
x = self.conv2(x)
x = F.avg_pool2d(x, (x.shape[2], x.shape[3]))
x = x.view(batch_size, -1)
x = self.fc1(x)
return x
def _init_parameters(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
nn.init.constant_(m.bias, 0)
# 迁移学习中的新网络 Net2,它与Net1的前1层结构相同
class Net2(Module):
def __init__(self, category_size):
super(Net2, self).__init__()
self.conv1 = nn.Conv2d(1, 1, 3, 1)
self.conv2 = nn.Conv2d(1, 1, 1, 1)
self.fc1 = nn.Linear(1, category_size)
# 先初始化后面几层的参数,再在后续操作中迁移载入前1层的参数
self._init_parameters()
def forward(self, x):
batch_size = x.size(0)
x = self.conv1(x)
x = self.conv2(x)
x = F.avg_pool2d(x, (x.shape[2], x.shape[3]))
x = F.adaptive_avg_pool2d(x, output_size=(1, 1))
x = x.view(batch_size, -1)
x = self.fc1(x)
return x
def _init_parameters(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
nn.init.constant_(m.bias, 0)
if __name__ == '__main__':
net = Net1(category_size=2)
# 打印参数值
for name, param in net.named_parameters():
print(name, '\t\t', param)
torch.save(net.state_dict(), './save/transfer.pth')
# ==============A Transfer learning Example===========================
print("# ==============Transfer===========================")
net = Net2(category_size=2)
save_model = torch.load('./save/transfer.pth')
model_dict = net.state_dict()
state_dict = {k: v for k, v in save_model.items() if k in set(['conv1.weight', 'conv1.bias'])}
# 必须更新(update)键值对里的值,否则不会载入迁移参数(即模型 Net2 依旧采用初始化值)
model_dict.update(state_dict)
net.load_state_dict(model_dict)
for name, param in net.named_parameters():
print(name, '\t\t', param)
输出结果:
conv1.weight Parameter containing:
tensor([[[[-0.6707, -0.2541, -0.2324],
[-0.2797, 0.1076, 0.4020],
[-0.3387, 0.2120, -0.1271]]]], requires_grad=True)
conv1.bias Parameter containing:
tensor([0.], requires_grad=True)
conv2.weight Parameter containing:
tensor([[[[-1.4478]]]], requires_grad=True)
conv2.bias Parameter containing:
tensor([0.], requires_grad=True)
fc1.weight Parameter containing:
tensor([[ 0.0200],
[-0.0027]], requires_grad=True)
fc1.bias Parameter containing:
tensor([0., 0.], requires_grad=True)
# ==============Transfer===========================
conv1.weight Parameter containing:
tensor([[[[-0.6707, -0.2541, -0.2324],
[-0.2797, 0.1076, 0.4020],
[-0.3387, 0.2120, -0.1271]]]], requires_grad=True)
conv1.bias Parameter containing:
tensor([0.], requires_grad=True)
conv2.weight Parameter containing:
tensor([[[[0.1384]]]], requires_grad=True)
conv2.bias Parameter containing:
tensor([0.], requires_grad=True)
fc1.weight Parameter containing:
tensor([[0.0035],
[0.0155]], requires_grad=True)
fc1.bias Parameter containing:
tensor([0., 0.], requires_grad=True)
详情细节请参考:Pytorch中,只导入部分模型参数的做法。
1.2 相同的网络模型
代码类似与章节1.1中的类似:
import numpy as np
import torch
from torch.nn import Module
from torch import nn
from torch.nn import functional as F
class Net1(Module):
def __init__(self, category_size):
super(Net1, self).__init__()
self.conv1 = nn.Conv2d(1, 1, 3, 1)
self.conv2 = nn.Conv2d(1, 1, 1, 1)
self.fc1 = nn.Linear(1, category_size)
self._init_parameters()
def forward(self, x):
batch_size = x.size(0)
x = self.conv1(x)
x = self.conv2(x)
x = F.avg_pool2d(x, (x.shape[2], x.shape[3]))
x = x.view(batch_size, -1)
x = self.fc1(x)
return x
def _init_parameters(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
nn.init.constant_(m.bias, 0)
class Net2(Module):
def __init__(self, category_size):
super(Net2, self).__init__()
self.conv1 = nn.Conv2d(1, 1, 3, 1)
self.conv2 = nn.Conv2d(1, 1, 1, 1)
self.fc1 = nn.Linear(1, category_size)
self._init_parameters()
def forward(self, x):
batch_size = x.size(0)
x = self.conv1(x)
x = self.conv2(x)
x = F.avg_pool2d(x, (x.shape[2], x.shape[3]))
x = x.view(batch_size, -1)
x = self.fc1(x)
return x
def _init_parameters(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
nn.init.constant_(m.bias, 0)
if __name__ == '__main__':
net = Net1(category_size=2)
for name, param in net.named_parameters():
print(name, '\t\t', param)
torch.save(net.state_dict(), './save/transfer.pth')
# ==============A Transfer learning Example===========================
print('# ==============Transfer===========================')
net = Net2(category_size=2)
save_model = torch.load('./save/transfer.pth')
model_dict = net.state_dict()
state_dict = {k: v for k, v in save_model.items() if k in set(['conv1.weight', 'conv1.bias'])}
model_dict.update(state_dict)
net.load_state_dict(model_dict)
for name, param in net.named_parameters():
print(name, '\t\t', param)
输出结果:
conv1.weight Parameter containing:
tensor([[[[ 0.2574, -0.6129, -0.0745],
[-0.1786, -0.2601, 0.4561],
[ 0.3343, -0.1011, -1.0915]]]], requires_grad=True)
conv1.bias Parameter containing:
tensor([0.], requires_grad=True)
conv2.weight Parameter containing:
tensor([[[[0.9942]]]], requires_grad=True)
conv2.bias Parameter containing:
tensor([0.], requires_grad=True)
fc1.weight Parameter containing:
tensor([[ 0.0039],
[-0.0050]], requires_grad=True)
fc1.bias Parameter containing:
tensor([0., 0.], requires_grad=True)
# ==============Transfer===========================
conv1.weight Parameter containing:
tensor([[[[ 0.2574, -0.6129, -0.0745],
[-0.1786, -0.2601, 0.4561],
[ 0.3343, -0.1011, -1.0915]]]], requires_grad=True)
conv1.bias Parameter containing:
tensor([0.], requires_grad=True)
conv2.weight Parameter containing:
tensor([[[[-0.8057]]]], requires_grad=True)
conv2.bias Parameter containing:
tensor([0.], requires_grad=True)
fc1.weight Parameter containing:
tensor([[ 0.0099],
[-0.0056]], requires_grad=True)
fc1.bias Parameter containing:
tensor([0., 0.], requires_grad=True)
2 冻结迁移的参数
2.1 冻结的卷积层没有BatchNormalization
强烈推荐知乎答案 Pytorch 如何精确的冻结我想冻结的预训练模型的某一层,有什么命令吗? - 有糖吃可好的回答 - 知乎,演示代码如下:
先建一个工具文件:
from collections.abc import Iterable
def set_freeze_by_names(model, layer_names, freeze=True):
if not isinstance(layer_names, Iterable):
layer_names = [layer_names]
for name, child in model.named_children():
if name not in layer_names:
continue
for param in child.parameters():
param.requires_grad = not freeze
def freeze_by_names(model, layer_names):
set_freeze_by_names(model, layer_names, True)
def unfreeze_by_names(model, layer_names):
set_freeze_by_names(model, layer_names, False)
再写主体代码,这里迁移前2层参数:
import numpy as np
import torch
from torch.nn import Module
from torch import nn
from torch.nn import functional as F
class Net1(Module):
def __init__(self, category_size):
super(Net1, self).__init__()
self.conv1 = nn.Conv2d(1, 1, 3, 1)
self.conv2 = nn.Conv2d(1, 1, 1, 1)
self.fc1 = nn.Linear(1, category_size)
self._init_parameters()
def forward(self, x):
batch_size = x.size(0)
x = self.conv1(x)
x = self.conv2(x)
x = F.avg_pool2d(x, (x.shape[2], x.shape[3]))
x = x.view(batch_size, -1)
x = self.fc1(x)
return x
def _init_parameters(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
nn.init.constant_(m.bias, 0)
class Net2(Module):
def __init__(self, category_size):
super(Net2, self).__init__()
self.conv1 = nn.Conv2d(1, 1, 3, 1)
self.conv2 = nn.Conv2d(1, 1, 1, 1)
self.fc1 = nn.Linear(1, category_size)
self._init_parameters()
def forward(self, x):
batch_size = x.size(0)
x = self.conv1(x)
x = self.conv2(x)
x = F.avg_pool2d(x, (x.shape[2], x.shape[3]))
x = F.adaptive_avg_pool2d(x, output_size=(1, 1))
x = x.view(batch_size, -1)
x = self.fc1(x)
return x
def _init_parameters(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
nn.init.constant_(m.bias, 0)
if __name__ == '__main__':
net = Net1(category_size=2)
for name, param in net.named_parameters():
print(name, '\t\t', param)
torch.save(net.state_dict(), './save/transfer.pth')
# ==============Transfer===========================
print('# ==============Transfer===========================')
net = Net2(category_size=2)
save_model = torch.load('./save/transfer.pth')
model_dict = net.state_dict()
state_dict = {k: v for k, v in save_model.items() if
k in set(['conv1.weight', 'conv1.bias', 'conv2.weight', 'conv2.bias'])}
model_dict.update(state_dict)
net.load_state_dict(model_dict)
for name, param in net.named_parameters():
print(name, '\t\t', param)
# ==============Freeze===========================
print('# ==============Freeze===========================')
import util
util.freeze_by_names(net, ['conv1', 'conv2'])
for name, param in net.named_parameters():
print(name, '\t\t', param)
# ============Train the model==========================
optimizer = torch.optim.SGD(net.parameters(), lr=0.05)
loss_func = nn.CrossEntropyLoss()
batch_size = 1
x = np.ones((batch_size, 1, 3, 3))
x = torch.Tensor(x)
y = [1]
y = torch.Tensor(y).long()
hat_y = net(x)
print("===========Results: ", hat_y)
net.train()
loss = loss_func(hat_y, y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
# ==============Freeze After Learning===========================
print('# ==============Freeze After Learning===========================')
for name, param in net.named_parameters():
print(name, '\t\t', param)
net.eval()
print("============Results after learning: ", net(x))
输出结果:
conv1.weight Parameter containing:
tensor([[[[ 1.6932, 0.5651, 0.4273],
[ 0.4963, -0.7085, 0.2292],
[-0.1459, 0.1176, 0.2800]]]], requires_grad=True)
conv1.bias Parameter containing:
tensor([0.], requires_grad=True)
conv2.weight Parameter containing:
tensor([[[[0.0634]]]], requires_grad=True)
conv2.bias Parameter containing:
tensor([0.], requires_grad=True)
fc1.weight Parameter containing:
tensor([[ 0.0168],
[-0.0051]], requires_grad=True)
fc1.bias Parameter containing:
tensor([0., 0.], requires_grad=True)
# ==============Transfer===========================
conv1.weight Parameter containing:
tensor([[[[ 1.6932, 0.5651, 0.4273],
[ 0.4963, -0.7085, 0.2292],
[-0.1459, 0.1176, 0.2800]]]], requires_grad=True)
conv1.bias Parameter containing:
tensor([0.], requires_grad=True)
conv2.weight Parameter containing:
tensor([[[[0.0634]]]], requires_grad=True)
conv2.bias Parameter containing:
tensor([0.], requires_grad=True)
fc1.weight Parameter containing:
tensor([[0.0211],
[0.0058]], requires_grad=True)
fc1.bias Parameter containing:
tensor([0., 0.], requires_grad=True)
# ==============Freeze===========================
conv1.weight Parameter containing:
tensor([[[[ 1.6932, 0.5651, 0.4273],
[ 0.4963, -0.7085, 0.2292],
[-0.1459, 0.1176, 0.2800]]]])
conv1.bias Parameter containing:
tensor([0.])
conv2.weight Parameter containing:
tensor([[[[0.0634]]]])
conv2.bias Parameter containing:
tensor([0.])
fc1.weight Parameter containing:
tensor([[0.0211],
[0.0058]], requires_grad=True)
fc1.bias Parameter containing:
tensor([0., 0.], requires_grad=True)
===========Results: tensor([[0.0040, 0.0011]], grad_fn=<AddmmBackward>)
# ==============Freeze After Learning===========================
conv1.weight Parameter containing:
tensor([[[[ 1.6932, 0.5651, 0.4273],
[ 0.4963, -0.7085, 0.2292],
[-0.1459, 0.1176, 0.2800]]]])
conv1.bias Parameter containing:
tensor([0.])
conv2.weight Parameter containing:
tensor([[[[0.0634]]]])
conv2.bias Parameter containing:
tensor([0.])
fc1.weight Parameter containing:
tensor([[0.0164],
[0.0105]], requires_grad=True)
fc1.bias Parameter containing:
tensor([-0.0250, 0.0250], requires_grad=True)
============Results after learning: tensor([[-0.0220, 0.0270]], grad_fn=<AddmmBackward>)
可以看到,前2层参数没有参与训练,一开始模型偏向类0,训练后修正为类1。
2.2 冻结的卷积层包含BatchNormalization
注意. 如果冻结的CNN层包含bn层,我们还需要额外的操作。虽然我们已经把bn层可学习的weight和bias冻结住了,但running mean和running var等参数还是会变化。对于这个操作我也是捉摸了好久,本来想先训练一个CNN-1,然后将CNN-1的前几层冻结迁移至CNN-2进行训练,最后写一个CNN-3同时输出CNN-1和CNN-2的结果,可是发现CNN-3的输出与CNN-2不匹配。
解决方案可参考这篇文章:pytorch固定BN层参数。
这里给一个例子(即重写父类的train和eval方法):
class Demo(nn.Module):
def __init__(self, categories_size):
super(Demo, self).__init__() # 3 * 224 * 224
...
def train(self, mode=True):
super().train(mode)
self.bn1.eval()
def eval(self):
super().eval()
训练时记得启用net.train()方法禁用bn层。
Reference
Pytorch中,只导入部分模型参数的做法
pytorch 打印网络参数
Pytorch 如何精确的冻结我想冻结的预训练模型的某一层,有什么命令吗? - 有糖吃可好的回答 - 知乎
pytorch固定BN层参数