pytorch2维模型转换为3维模型,并使用2维模型参数初始化3维模型——以resnext为例


最近在整理视频理解相关领域的算法,看了I3D的文章,觉得使用2D模型的预训练初始化3D网络的模型参数是一个非常重要的步骤(提点),因此写一篇博客进行记录,希望对大家有帮助。


2D网络结构转换到3D网络结构

3D网络相比较于2D网络主要是数据多了一个时空维度BCHW 到 BCTHW
因此将2D网络中的2D结构转换为3D结构,注意要设计好3D的卷积核、stride、padding等参数,主要有:

nn.BatchNorm2d        -> nn.BatchNorm3d
nn.Conv2d             -> nn.Conv3d
nn.AdaptiveAvgPool2d  -> nn.AdaptiveAvgPool3d

Inflate 初始化3D网络的参数

在之前的博客基础上进行了修改(https://blog.csdn.net/Abo_luo/article/details/117304751?spm=1001.2014.3001.5502)

def transfer_model(pretrained_file, model):
    pretrained_dict = torch.load(pretrained_file)  # get pretrained dict
    model_dict = model.state_dict()  # get model dict
    # 在合并前(update),需要去除pretrained_dict一些不需要的参数
    pretrained_dict = transfer_state_dict(pretrained_dict, model_dict)
    model_dict.update(pretrained_dict)  # 更新(合并)模型的参数
    model.load_state_dict(model_dict)
    return model

def transfer_state_dict(pretrained_dict, model_dict):
    q = 0
    for (k1, v1), (k2, v2) in zip(pretrained_dict.items(), model_dict.items()):
        if v1.shape == v2.shape:
            model_dict[k2] = pretrained_dict[k1]
            q += 1
        else:
        	#膨胀2D结构的参数
            temp_dim = v2.shape[2]
            model_dict[k2] = torch.unsqueeze(v1, 2).repeat(1, 1, temp_dim, 1, 1) / temp_dim
            q += 1
    print('模型总结构数%d个,成功加载参数%d个'%(len(model_dict),q))
    return model_dict

完整结构主要分为2步骤

1、找到2d网络的代码,并修改为3d的代码
2、找到对应的2d网络权重(下面代码中用的是timm库提供好的预训练权重,但是通常要模型适配,可以修改上面的transfer_state_dict函数),并迁移到3d网络中

2维resnext网络结构

import math
import torch
import torch.nn as nn
import torch.nn.functional as F
import timm
CARDINALITY = 32
DEPTH = 4
BASEWIDTH = 64

def transfer_model(pretrained_file, model):
    pretrained_dict = torch.load(pretrained_file)  # get pretrained dict
    model_dict = model.state_dict()  # get model dict
    # 在合并前(update),需要去除pretrained_dict一些不需要的参数
    pretrained_dict = transfer_state_dict(pretrained_dict, model_dict)
    print('成功加载参数%d个'%len(pretrained_dict))
    model_dict.update(pretrained_dict)  # 更新(合并)模型的参数
    model.load_state_dict(model_dict)
    return model

def transfer_state_dict(pretrained_dict, model_dict):
    state_dict = {}
    for (k1, v1), (k2, v2) in zip(pretrained_dict.items(), model_dict.items()):
        if v1.shape == v2.shape:
            state_dict[k2] = pretrained_dict[k1]
        else:
            print('%s形状不匹配'%k2)
    return state_dict

class ResNextBottleNeckC(nn.Module):
    def __init__(self, in_channels, out_channels, stride):
        super().__init__()
        C = CARDINALITY
        D = int(DEPTH * out_channels / BASEWIDTH) #number of channels per group
        self.split_transforms = nn.Sequential(
            nn.Conv2d(in_channels, C * D, kernel_size=1, groups=1, bias=False),
            nn.BatchNorm2d(C * D),
            nn.ReLU(inplace=True),
            nn.Conv2d(C * D, C * D, kernel_size=3, stride=stride, groups=C, padding=1, bias=False),
            nn.BatchNorm2d(C * D),
            nn.ReLU(inplace=True),
            nn.Conv2d(C * D, out_channels * 4, kernel_size=1, bias=False),
            nn.BatchNorm2d(out_channels * 4),
        )

        self.shortcut = nn.Sequential()

        if stride != 1 or in_channels != out_channels * 4:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels * 4, stride=stride, kernel_size=1, bias=False),
                nn.BatchNorm2d(out_channels * 4)
            )

    def forward(self, x):
        return F.relu(self.split_transforms(x) + self.shortcut(x))

class ResNext(nn.Module):

    def __init__(self, block, num_blocks, class_names=100):
        super().__init__()
        self.in_channels = 64

        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 64, 7, stride=1, padding=3, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True)
        )

        self.conv2 = self._make_layer(block, num_blocks[0], 64, 1)
        self.conv3 = self._make_layer(block, num_blocks[1], 128, 2)
        self.conv4 = self._make_layer(block, num_blocks[2], 256, 2)
        self.conv5 = self._make_layer(block, num_blocks[3], 512, 2)
        self.avg = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512 * 4, class_names)

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.conv5(x)
        x = self.avg(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

    def _make_layer(self, block, num_block, out_channels, stride):
        strides = [stride] + [1] * (num_block - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_channels, out_channels, stride))
            self.in_channels = out_channels * 4

        return nn.Sequential(*layers)

def resnext50(num_class):
    """ return a resnext50(c32x4d) network
    """
    return ResNext(ResNextBottleNeckC, [3, 4, 6, 3],class_names=num_class)

def resnext101():
    """ return a resnext101(c32x4d) network
    """
    return ResNext(ResNextBottleNeckC, [3, 4, 23, 3])

def resnext152():
    """ return a resnext101(c32x4d) network
    """
    return ResNext(ResNextBottleNeckC, [3, 4, 36, 3])


if __name__ == '__main__':
	#使用timm的resnext预训练权重
    model_names = timm.list_models(pretrained=True)
    print("支持的预训练模型数量:%s" % model_names)
    model = timm.create_model('resnext50d_32x4d',pretrained=True)
    torch.save(model.state_dict(),'./resnext50.pth')
	
	#定义自己的2d resnext网络,并使用timm的预训练权重
    model = resnext50(num_class=2)
    model = transfer_model("./resnext50.pth", model)
    #并保存resnext50_after_timm.pth,为了下一个程序不需要做key匹配
    torch.save(model.state_dict(),'./resnext50_after_timm.pth')
    
    input = torch.randn(2,3,224,224)
    output = model(input)
    print(output.shape)

3维resnext网络结构

import math
import torch
import torch.nn as nn
import torch.nn.functional as F
import timm

CARDINALITY = 32
DEPTH = 4
BASEWIDTH = 64

def transfer_model(pretrained_file, model):
    pretrained_dict = torch.load(pretrained_file)  # get pretrained dict
    model_dict = model.state_dict()  # get model dict
    # 在合并前(update),需要去除pretrained_dict一些不需要的参数
    pretrained_dict = transfer_state_dict(pretrained_dict, model_dict)
    model_dict.update(pretrained_dict)  # 更新(合并)模型的参数
    model.load_state_dict(model_dict)
    return model

def transfer_state_dict(pretrained_dict, model_dict):
    q = 0
    for (k1, v1), (k2, v2) in zip(pretrained_dict.items(), model_dict.items()):
        if v1.shape == v2.shape:
            model_dict[k2] = pretrained_dict[k1]
            q += 1
        else:
            temp_dim = v2.shape[2]
            model_dict[k2] = torch.unsqueeze(v1, 2).repeat(1, 1, temp_dim, 1, 1) / temp_dim
            q += 1
    print('模型总结构数%d个,成功加载参数%d个'%(len(model_dict),q))
    return model_dict

class ResNextBottleNeckC(nn.Module):

    def __init__(self, in_channels, out_channels, stride):
        super().__init__()

        C = CARDINALITY
        D = int(DEPTH * out_channels / BASEWIDTH) #number of channels per group
        self.split_transforms = nn.Sequential(
            nn.Conv3d(in_channels, C * D, kernel_size=1, groups=1, bias=False),
            nn.BatchNorm3d(C * D),
            nn.ReLU(inplace=True),
            nn.Conv3d(C * D, C * D, kernel_size=3, stride=(1,stride,stride), groups=C, padding=1, bias=False),
            nn.BatchNorm3d(C * D),
            nn.ReLU(inplace=True),
            nn.Conv3d(C * D, out_channels * 4, kernel_size=1, bias=False),
            nn.BatchNorm3d(out_channels * 4),
        )

        self.shortcut = nn.Sequential()

        if stride != 1 or in_channels != out_channels * 4:
            self.shortcut = nn.Sequential(
                nn.Conv3d(in_channels, out_channels * 4, stride=(1,stride,stride), kernel_size=1, bias=False),
                nn.BatchNorm3d(out_channels * 4)
            )

    def forward(self, x):
        return F.relu(self.split_transforms(x) + self.shortcut(x))

class ResNext(nn.Module):

    def __init__(self, block, num_blocks, class_names=100):
        super().__init__()
        self.in_channels = 64

        self.conv1 = nn.Sequential(
            nn.Conv3d(3, 64, 7, stride=(2,2,2), padding=3, bias=False),
            nn.BatchNorm3d(64),
            nn.ReLU(inplace=True)
        )

        self.conv2 = self._make_layer(block, num_blocks[0], 64, 1)
        self.conv3 = self._make_layer(block, num_blocks[1], 128, 2)
        self.conv4 = self._make_layer(block, num_blocks[2], 256, 2)
        self.conv5 = self._make_layer(block, num_blocks[3], 512, 2)
        self.avg = nn.AdaptiveAvgPool3d((1, 1, 1))
        self.fc = nn.Linear(512 * 4, class_names)

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.conv5(x)
        x = self.avg(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

    def _make_layer(self, block, num_block, out_channels, stride):
        strides = [stride] + [1] * (num_block - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_channels, out_channels, stride))
            self.in_channels = out_channels * 4

        return nn.Sequential(*layers)

def resnext50(num_class):
    """ return a resnext50(c32x4d) network
    """
    return ResNext(ResNextBottleNeckC, [3, 4, 6, 3],class_names=num_class)

def resnext101():
    """ return a resnext101(c32x4d) network
    """
    return ResNext(ResNextBottleNeckC, [3, 4, 23, 3])

def resnext152():
    """ return a resnext101(c32x4d) network
    """
    return ResNext(ResNextBottleNeckC, [3, 4, 36, 3])



if __name__ == '__main__':
    model = resnext50(2).cuda()
    model = transfer_model("./resnext50_after_timm.pth", model)
    
    input = torch.randn(2,3,10,224,224).cuda()
    output = model(input)
    print(output.shape)

总结

上述第一段程序中主要需要修改的是transfer_state_dict函数,第二段程序(3D)和第一段程序(2D)模型的num_class需要相等,因为在第二段程序中本来就存在网络参数形状不匹配,因此没有做相关判断,直接使用了key(顺序)进行迁移。模型权重链接:https://pan.baidu.com/s/1AH1yJP5DWrEL7hqQByVStQ?pwd=x9l2
提取码:x9l2

  • 1
    点赞
  • 6
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值