PyTorch迁移学习简单例子

1 仅导入前几层参数

1.1 不相同的网络模型

这里演示如何仅迁移前1层参数,代码如下:

import numpy as np
import torch
from torch.nn import Module
from torch import nn
from torch.nn import functional as F

# 预训练的网络模型 Net1
class Net1(Module):
    def __init__(self, category_size):
        super(Net1, self).__init__()
        self.conv1 = nn.Conv2d(1, 1, 3, 1)
        self.conv2 = nn.Conv2d(1, 1, 1, 1)
        self.fc1 = nn.Linear(1, category_size)

        self._init_parameters()

    def forward(self, x):
        batch_size = x.size(0)
        x = self.conv1(x)
        x = self.conv2(x)
        x = F.avg_pool2d(x, (x.shape[2], x.shape[3]))
        x = x.view(batch_size, -1)
        x = self.fc1(x)
        return x

    def _init_parameters(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)


# 迁移学习中的新网络 Net2,它与Net1的前1层结构相同
class Net2(Module):
    def __init__(self, category_size):
        super(Net2, self).__init__()
        self.conv1 = nn.Conv2d(1, 1, 3, 1)
        self.conv2 = nn.Conv2d(1, 1, 1, 1)
        self.fc1 = nn.Linear(1, category_size)

		# 先初始化后面几层的参数,再在后续操作中迁移载入前1层的参数
        self._init_parameters()

    def forward(self, x):
        batch_size = x.size(0)
        x = self.conv1(x)
        x = self.conv2(x)
        x = F.avg_pool2d(x, (x.shape[2], x.shape[3]))
        x = F.adaptive_avg_pool2d(x, output_size=(1, 1))
        x = x.view(batch_size, -1)
        x = self.fc1(x)
        return x

    def _init_parameters(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)


if __name__ == '__main__':
    net = Net1(category_size=2)

	# 打印参数值
    for name, param in net.named_parameters():
        print(name, '\t\t', param)

    torch.save(net.state_dict(), './save/transfer.pth')

    # ==============A Transfer learning Example===========================
    print("# ==============Transfer===========================")
    net = Net2(category_size=2)

    save_model = torch.load('./save/transfer.pth')
    model_dict = net.state_dict()
    state_dict = {k: v for k, v in save_model.items() if k in set(['conv1.weight', 'conv1.bias'])}
    # 必须更新(update)键值对里的值,否则不会载入迁移参数(即模型 Net2 依旧采用初始化值)
    model_dict.update(state_dict)
    net.load_state_dict(model_dict)

    for name, param in net.named_parameters():
        print(name, '\t\t', param)

输出结果:

conv1.weight 		 Parameter containing:
tensor([[[[-0.6707, -0.2541, -0.2324],
          [-0.2797,  0.1076,  0.4020],
          [-0.3387,  0.2120, -0.1271]]]], requires_grad=True)
conv1.bias 		 Parameter containing:
tensor([0.], requires_grad=True)
conv2.weight 		 Parameter containing:
tensor([[[[-1.4478]]]], requires_grad=True)
conv2.bias 		 Parameter containing:
tensor([0.], requires_grad=True)
fc1.weight 		 Parameter containing:
tensor([[ 0.0200],
        [-0.0027]], requires_grad=True)
fc1.bias 		 Parameter containing:
tensor([0., 0.], requires_grad=True)
# ==============Transfer===========================
conv1.weight 		 Parameter containing:
tensor([[[[-0.6707, -0.2541, -0.2324],
          [-0.2797,  0.1076,  0.4020],
          [-0.3387,  0.2120, -0.1271]]]], requires_grad=True)
conv1.bias 		 Parameter containing:
tensor([0.], requires_grad=True)
conv2.weight 		 Parameter containing:
tensor([[[[0.1384]]]], requires_grad=True)
conv2.bias 		 Parameter containing:
tensor([0.], requires_grad=True)
fc1.weight 		 Parameter containing:
tensor([[0.0035],
        [0.0155]], requires_grad=True)
fc1.bias 		 Parameter containing:
tensor([0., 0.], requires_grad=True)

详情细节请参考:Pytorch中,只导入部分模型参数的做法

1.2 相同的网络模型

代码类似与章节1.1中的类似:

import numpy as np
import torch
from torch.nn import Module
from torch import nn
from torch.nn import functional as F


class Net1(Module):
    def __init__(self, category_size):
        super(Net1, self).__init__()
        self.conv1 = nn.Conv2d(1, 1, 3, 1)
        self.conv2 = nn.Conv2d(1, 1, 1, 1)
        self.fc1 = nn.Linear(1, category_size)

        self._init_parameters()

    def forward(self, x):
        batch_size = x.size(0)
        x = self.conv1(x)
        x = self.conv2(x)
        x = F.avg_pool2d(x, (x.shape[2], x.shape[3]))
        x = x.view(batch_size, -1)
        x = self.fc1(x)
        return x

    def _init_parameters(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)


class Net2(Module):
    def __init__(self, category_size):
        super(Net2, self).__init__()
        self.conv1 = nn.Conv2d(1, 1, 3, 1)
        self.conv2 = nn.Conv2d(1, 1, 1, 1)
        self.fc1 = nn.Linear(1, category_size)

        self._init_parameters()

    def forward(self, x):
        batch_size = x.size(0)
        x = self.conv1(x)
        x = self.conv2(x)
        x = F.avg_pool2d(x, (x.shape[2], x.shape[3]))
        x = x.view(batch_size, -1)
        x = self.fc1(x)
        return x

    def _init_parameters(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)


if __name__ == '__main__':
    net = Net1(category_size=2)

    for name, param in net.named_parameters():
        print(name, '\t\t', param)

    torch.save(net.state_dict(), './save/transfer.pth')

    # ==============A Transfer learning Example===========================
    print('# ==============Transfer===========================')

    net = Net2(category_size=2)

    save_model = torch.load('./save/transfer.pth')
    model_dict = net.state_dict()
    state_dict = {k: v for k, v in save_model.items() if k in set(['conv1.weight', 'conv1.bias'])}
    model_dict.update(state_dict)
    net.load_state_dict(model_dict)

    for name, param in net.named_parameters():
        print(name, '\t\t', param)

输出结果:

conv1.weight 		 Parameter containing:
tensor([[[[ 0.2574, -0.6129, -0.0745],
          [-0.1786, -0.2601,  0.4561],
          [ 0.3343, -0.1011, -1.0915]]]], requires_grad=True)
conv1.bias 		 Parameter containing:
tensor([0.], requires_grad=True)
conv2.weight 		 Parameter containing:
tensor([[[[0.9942]]]], requires_grad=True)
conv2.bias 		 Parameter containing:
tensor([0.], requires_grad=True)
fc1.weight 		 Parameter containing:
tensor([[ 0.0039],
        [-0.0050]], requires_grad=True)
fc1.bias 		 Parameter containing:
tensor([0., 0.], requires_grad=True)
# ==============Transfer===========================
conv1.weight 		 Parameter containing:
tensor([[[[ 0.2574, -0.6129, -0.0745],
          [-0.1786, -0.2601,  0.4561],
          [ 0.3343, -0.1011, -1.0915]]]], requires_grad=True)
conv1.bias 		 Parameter containing:
tensor([0.], requires_grad=True)
conv2.weight 		 Parameter containing:
tensor([[[[-0.8057]]]], requires_grad=True)
conv2.bias 		 Parameter containing:
tensor([0.], requires_grad=True)
fc1.weight 		 Parameter containing:
tensor([[ 0.0099],
        [-0.0056]], requires_grad=True)
fc1.bias 		 Parameter containing:
tensor([0., 0.], requires_grad=True)

2 冻结迁移的参数

2.1 冻结的卷积层没有BatchNormalization

强烈推荐知乎答案 Pytorch 如何精确的冻结我想冻结的预训练模型的某一层,有什么命令吗? - 有糖吃可好的回答 - 知乎,演示代码如下:
先建一个工具文件:

from collections.abc import Iterable


def set_freeze_by_names(model, layer_names, freeze=True):
    if not isinstance(layer_names, Iterable):
        layer_names = [layer_names]
    for name, child in model.named_children():
        if name not in layer_names:
            continue
        for param in child.parameters():
            param.requires_grad = not freeze


def freeze_by_names(model, layer_names):
    set_freeze_by_names(model, layer_names, True)


def unfreeze_by_names(model, layer_names):
    set_freeze_by_names(model, layer_names, False)

再写主体代码,这里迁移前2层参数:

import numpy as np
import torch
from torch.nn import Module
from torch import nn
from torch.nn import functional as F


class Net1(Module):
    def __init__(self, category_size):
        super(Net1, self).__init__()
        self.conv1 = nn.Conv2d(1, 1, 3, 1)
        self.conv2 = nn.Conv2d(1, 1, 1, 1)
        self.fc1 = nn.Linear(1, category_size)

        self._init_parameters()

    def forward(self, x):
        batch_size = x.size(0)
        x = self.conv1(x)
        x = self.conv2(x)
        x = F.avg_pool2d(x, (x.shape[2], x.shape[3]))
        x = x.view(batch_size, -1)
        x = self.fc1(x)
        return x

    def _init_parameters(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)


class Net2(Module):
    def __init__(self, category_size):
        super(Net2, self).__init__()
        self.conv1 = nn.Conv2d(1, 1, 3, 1)
        self.conv2 = nn.Conv2d(1, 1, 1, 1)
        self.fc1 = nn.Linear(1, category_size)

        self._init_parameters()

    def forward(self, x):
        batch_size = x.size(0)
        x = self.conv1(x)
        x = self.conv2(x)
        x = F.avg_pool2d(x, (x.shape[2], x.shape[3]))
        x = F.adaptive_avg_pool2d(x, output_size=(1, 1))
        x = x.view(batch_size, -1)
        x = self.fc1(x)
        return x

    def _init_parameters(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)


if __name__ == '__main__':
    net = Net1(category_size=2)

    for name, param in net.named_parameters():
        print(name, '\t\t', param)

    torch.save(net.state_dict(), './save/transfer.pth')

    # ==============Transfer===========================
    print('# ==============Transfer===========================')

    net = Net2(category_size=2)

    save_model = torch.load('./save/transfer.pth')
    model_dict = net.state_dict()
    state_dict = {k: v for k, v in save_model.items() if
                  k in set(['conv1.weight', 'conv1.bias', 'conv2.weight', 'conv2.bias'])}
    model_dict.update(state_dict)
    net.load_state_dict(model_dict)

    for name, param in net.named_parameters():
        print(name, '\t\t', param)

    # ==============Freeze===========================
    print('# ==============Freeze===========================')

    import util

    util.freeze_by_names(net, ['conv1', 'conv2'])

    for name, param in net.named_parameters():
        print(name, '\t\t', param)

	# ============Train the model==========================
    optimizer = torch.optim.SGD(net.parameters(), lr=0.05)
    loss_func = nn.CrossEntropyLoss()

    batch_size = 1
    x = np.ones((batch_size, 1, 3, 3))
    x = torch.Tensor(x)
    y = [1]
    y = torch.Tensor(y).long()
    hat_y = net(x)
    print("===========Results: ", hat_y)

    net.train()
    loss = loss_func(hat_y, y)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # ==============Freeze After Learning===========================
    print('# ==============Freeze After Learning===========================')

    for name, param in net.named_parameters():
        print(name, '\t\t', param)

    net.eval()
    print("============Results after learning: ", net(x))

输出结果:

conv1.weight 		 Parameter containing:
tensor([[[[ 1.6932,  0.5651,  0.4273],
          [ 0.4963, -0.7085,  0.2292],
          [-0.1459,  0.1176,  0.2800]]]], requires_grad=True)
conv1.bias 		 Parameter containing:
tensor([0.], requires_grad=True)
conv2.weight 		 Parameter containing:
tensor([[[[0.0634]]]], requires_grad=True)
conv2.bias 		 Parameter containing:
tensor([0.], requires_grad=True)
fc1.weight 		 Parameter containing:
tensor([[ 0.0168],
        [-0.0051]], requires_grad=True)
fc1.bias 		 Parameter containing:
tensor([0., 0.], requires_grad=True)
# ==============Transfer===========================
conv1.weight 		 Parameter containing:
tensor([[[[ 1.6932,  0.5651,  0.4273],
          [ 0.4963, -0.7085,  0.2292],
          [-0.1459,  0.1176,  0.2800]]]], requires_grad=True)
conv1.bias 		 Parameter containing:
tensor([0.], requires_grad=True)
conv2.weight 		 Parameter containing:
tensor([[[[0.0634]]]], requires_grad=True)
conv2.bias 		 Parameter containing:
tensor([0.], requires_grad=True)
fc1.weight 		 Parameter containing:
tensor([[0.0211],
        [0.0058]], requires_grad=True)
fc1.bias 		 Parameter containing:
tensor([0., 0.], requires_grad=True)
# ==============Freeze===========================
conv1.weight 		 Parameter containing:
tensor([[[[ 1.6932,  0.5651,  0.4273],
          [ 0.4963, -0.7085,  0.2292],
          [-0.1459,  0.1176,  0.2800]]]])
conv1.bias 		 Parameter containing:
tensor([0.])
conv2.weight 		 Parameter containing:
tensor([[[[0.0634]]]])
conv2.bias 		 Parameter containing:
tensor([0.])
fc1.weight 		 Parameter containing:
tensor([[0.0211],
        [0.0058]], requires_grad=True)
fc1.bias 		 Parameter containing:
tensor([0., 0.], requires_grad=True)
===========Results:  tensor([[0.0040, 0.0011]], grad_fn=<AddmmBackward>)
# ==============Freeze After Learning===========================
conv1.weight 		 Parameter containing:
tensor([[[[ 1.6932,  0.5651,  0.4273],
          [ 0.4963, -0.7085,  0.2292],
          [-0.1459,  0.1176,  0.2800]]]])
conv1.bias 		 Parameter containing:
tensor([0.])
conv2.weight 		 Parameter containing:
tensor([[[[0.0634]]]])
conv2.bias 		 Parameter containing:
tensor([0.])
fc1.weight 		 Parameter containing:
tensor([[0.0164],
        [0.0105]], requires_grad=True)
fc1.bias 		 Parameter containing:
tensor([-0.0250,  0.0250], requires_grad=True)
============Results after learning:  tensor([[-0.0220,  0.0270]], grad_fn=<AddmmBackward>)

可以看到,前2层参数没有参与训练,一开始模型偏向类0,训练后修正为类1。

2.2 冻结的卷积层包含BatchNormalization

注意. 如果冻结的CNN层包含bn层,我们还需要额外的操作。虽然我们已经把bn层可学习的weight和bias冻结住了,但running mean和running var等参数还是会变化。对于这个操作我也是捉摸了好久,本来想先训练一个CNN-1,然后将CNN-1的前几层冻结迁移至CNN-2进行训练,最后写一个CNN-3同时输出CNN-1和CNN-2的结果,可是发现CNN-3的输出与CNN-2不匹配。

解决方案可参考这篇文章:pytorch固定BN层参数

这里给一个例子(即重写父类的train和eval方法):

class Demo(nn.Module):
    def __init__(self, categories_size):
    	super(Demo, self).__init__()  # 3 * 224 * 224
    	...
    
    def train(self, mode=True):
        super().train(mode)
        self.bn1.eval()

    def eval(self):
        super().eval()

训练时记得启用net.train()方法禁用bn层。

Reference

Pytorch中,只导入部分模型参数的做法
pytorch 打印网络参数
Pytorch 如何精确的冻结我想冻结的预训练模型的某一层,有什么命令吗? - 有糖吃可好的回答 - 知乎
pytorch固定BN层参数

  • 1
    点赞
  • 8
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值