[pytorchb笔记]03 --模型

模型定义的方式

  • 模型定义的三种方式
模型定义方式对应操作
Sequential向前计算方式为简单串联各个层的计算时,通过接收一个子模块的有序字典或一系列子模块作为参数
ModuleList以列表为参数
ModuleDict以字典为参数
#Sequential
#简单,适合用于快速验证想法,小型模型适合
import torch.nn as nn
import collections
## 直接排列
net = nn.Sequential(
	nn.Linear(784,256)
	nn.ReLU(),
	nn.Linear(256,10),
)
## 使用有序字典(OrderedDict)
net2 = nn.Sequential(collections.OrderedDict([
	('fc1',nn.Linear(784,256)),
	('relu1',nn.ReLU()),
	('fc2',nn.Linear(256,10))
]))
## 使用
class Net(nn.Module):
	def __init__(self):
		super(Net,self).__init__()
		self.module = nn.Sequential(
			nn.Linear(784,256),
			nn.ReLU(),
			nn.Linear()
		)
	def forward(self,x):
		x = self.module(x)
		return x
#灵活,对于需要重复实现相同层时非常方便
#ModuleList
net = nn.ModuleList([nn.Linear(784,256),nn.ReLU])
net.append(nn.Linear(256,10))#可以索引
## 使用
class model(nn.Module):
	def __init__(self,net):
		super(model,self).__init__()
		self.modulelist = net
		...
	def forward(self,x):
		for layer in self.modulelist:
			x = layer(x)
		return x
##
#ModuleDict
net = nn.ModuleDict({
	'linear':nn.Linear(784,256),
	'act':nn.ReLU(),
})
net['output'] = nn.Linear(256,10)#添加
## 使用 与ModuleList相似

利用模型块快速搭建复杂网络

以U-Net模型为例
具体思路为:每层单独设计为一个模块,然后在合在一起使用(分总思路)

import torch
import torch.nn as nn
import torch.nn.functional as F
class DoubleConv(nn.Module):#两次卷积
    """(convolution => [BN] => ReLU) * 2"""
    def __init__(self, in_channels, out_channels, mid_channels=None):
        super().__init__()
        if not mid_channels:
            mid_channels = out_channels
        self.double_conv = nn.Sequential(
            nn.Conv2d(in_channels, mid_channels, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(mid_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(mid_channels, out_channels, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        return self.double_conv(x)

class Down(nn.Module):#下采样连接
    """Downscaling with maxpool then double conv"""

    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.maxpool_conv = nn.Sequential(
            nn.MaxPool2d(2),
            DoubleConv(in_channels, out_channels)
        )

    def forward(self, x):
        return self.maxpool_conv(x)

class Up(nn.Module):#上采样连接
    """Upscaling then double conv"""

    def __init__(self, in_channels, out_channels, bilinear=False):
        super().__init__()
        if bilinear:
            self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
            self.conv = DoubleConv(in_channels, out_channels, in_channels // 2)
        else:
            self.up = nn.ConvTranspose2d(in_channels, in_channels // 2, kernel_size=2, stride=2)
            self.conv = DoubleConv(in_channels, out_channels)

    def forward(self, x1, x2):
        x1 = self.up(x1)
        # input is CHW
        diffY = x2.size()[2] - x1.size()[2]
        diffX = x2.size()[3] - x1.size()[3]
        x1 = F.pad(x1, [diffX // 2, diffX - diffX // 2,
                        diffY // 2, diffY - diffY // 2])
        x = torch.cat([x2, x1], dim=1)
        return self.conv(x)
class OutConv(nn.Module):#输出层卷积
    def __init__(self, in_channels, out_channels):
        super(OutConv, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1)
    def forward(self, x):
        return self.conv(x)
class UNet(nn.Module):#将模块拼合在一起
    def __init__(self, n_channels, n_classes, bilinear=False):
        super(UNet, self).__init__()
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.bilinear = bilinear

        self.inc = DoubleConv(n_channels, 64)
        self.down1 = Down(64, 128)
        self.down2 = Down(128, 256)
        self.down3 = Down(256, 512)
        factor = 2 if bilinear else 1
        self.down4 = Down(512, 1024 // factor)
        self.up1 = Up(1024, 512 // factor, bilinear)
        self.up2 = Up(512, 256 // factor, bilinear)
        self.up3 = Up(256, 128 // factor, bilinear)
        self.up4 = Up(128, 64, bilinear)
        self.outc = OutConv(64, n_classes)
    def forward(self, x):
        x1 = self.inc(x)
        x2 = self.down1(x1)
        x3 = self.down2(x2)
        x4 = self.down3(x3)
        x5 = self.down4(x4)
        x = self.up1(x5, x4)
        x = self.up2(x, x3)
        x = self.up3(x, x2)
        x = self.up4(x, x1)
        logits = self.outc(x)
        return logits

修改、保存与读取模型

修改

  • 目的:在已有模型上对结构进行修改
import torchvision.models as models
net = models.resnet50()
print(net)
from collections import OrderedDict
classifier = nn.Sequential(OrderedDict([('fc1', nn.Linear(2048, 128)),
                          ('relu1', nn.ReLU()), 
                          ('dropout1',nn.Dropout(0.5)),
                          ('fc2', nn.Linear(128, 10)),
                          ('output', nn.Softmax(dim=1))
                          ]))
    
net.fc = classifier
#修改操作1,添加外部输入
## 将原模型添加输入位置前的部分作为一个整体,同时在forward中定义好原模型不变的部分、添加的输入和后续层之间的连接关系,从而完成模型的修改
class Model(nn.Module):
    def __init__(self, net):
        super(Model, self).__init__()
        self.net = net
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.5)
        self.fc_add = nn.Linear(1001, 10, bias=True)
        self.output = nn.Softmax(dim=1)
        
    def forward(self, x, add_variable):#add_variable额外输入
        x = self.net(x)
        x = torch.cat((self.dropout(self.relu(x)), add_variable.unsqueeze(1)),1)
        x = self.fc_add(x)
        x = self.output(x)
        return x
import torchvision.models as models
net = models.resnet50()
model = Model(net).cuda()
outputs = model(inputs, add_var)
#修改操作2,添加额外输出
##便于观察每层运行情况
class Model(nn.Module):
    def __init__(self, net):
        super(Model, self).__init__()
        self.net = net
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.5)
        self.fc1 = nn.Linear(1000, 10, bias=True)
        self.output = nn.Softmax(dim=1)
        
    def forward(self, x, add_variable):
        x1000 = self.net(x)
        x10 = self.dropout(self.relu(x1000))
        x10 = self.fc1(x10)
        x10 = self.output(x10)
        return x10, x1000#额外输出添加
import torchvision.models as models
net = models.resnet50()
model = Model(net).cuda()
out10, out1000 = model(inputs, add_var)

保存&读取

  • pytorch存储模型主要采取pkl,pt,pth
  • 一个pytorch模型主要包含两给部分,模型结构和权重
    • 模型:继承nn.Module的类
    • 权重:一个字典(key是层名,value是权重向量)
    • 可以两者皆存,也可只存权重
	import os
	import torch
	from torchvision import models
	model = models.resnet152(pretrained=True)#使用预训练模型

	os.environ['CUDA_VISIBLE_DEVICES'] = '0'#多个就是多卡
	model.cuda()#加载到单卡上去
	model = torch.nn.DataParallel(model).cuda()#加载到多卡上去
	# 保存整个模型
	torch.save(model, save_dir)
	loaded_model = torch.load(save_dir)
	# 保存模型权重
	torch.save(model.state_dict, save_dir)
	loaded_dict =torch.load(save_dir)
  • 单卡和多卡存储的区别

    • 层名区别
      多卡并行的模型每层的名称比单卡模型前多了一个“module”
    • 单卡保存+单卡加载
    import os
    import torch
    from torchvision import models
    os.environ['CUDA_VISIBLE_DEVICES']='0'
    #保存+读取 模型
    torch.save(model,save_dir)
    loaded_model = torch.load(save_dir)
    loaded_model.cuda()
    #保存+读取 权重
    torch.save(model.state_dict(),save_dir)
    loaded_dict =torch.load(save_dir)
    loaded_model = models.resnet152()#需要对模型结构定义
    loaded_model.state_dict = loaded_dict#将取到的权重加载到模型中
    
    • 单卡保存+多卡加载 (不需多言)

    • 多卡保存+单卡加载 (单卡加载时需要去除 多卡模型中的 module标识)

    ## 关键点在于如何去除命名中的标识 module 确保模型统一性
    os.environ['CUDA_VISIBLE_DEVICES'] = '1,2'   #这里替换成希望使用的GPU编号
    model = models.resnet152(pretrained=True)
    model = nn.DataParallel(model).cuda()#多卡加载
    
    # 保存+读取 整个模型 (比较简单 直接获取module字段对应内容即可)
    torch.save(model, save_dir)#保存
    os.environ['CUDA_VISIBLE_DEVICES'] = '0'   #这里替换成希望使用的GPU编号
    loaded_model = torch.load(save_dir)
    loaded_model = loaded_model.module#操作在这里
    # 保存+读取 模型权重
    torch.save(model.state_dict(), save_dir)#保存未变化
    
    ## 读取还是按多卡读取,单启用的device id只有一个
    os.environ['CUDA_VISIBLE_DEVICES'] = '0'   #这里替换成希望使用的GPU编号
    loaded_dict = torch.load(save_dir)
    loaded_model = models.resnet152()   #注意这里需要对模型结构有定义
    loaded_model = nn.DataParallel(loaded_model).cuda()#按多卡读取权重
    loaded_model.state_dict = loaded_dict
    ## 遍历字典去除module
    from collections import OrderedDict
    os.environ['CUDA_VISIBLE_DEVICES'] = '0'   #这里替换成希望使用的GPU编号
    loaded_dict = torch.load(save_dir)
    new_state_dict = OrderedDict()
    for k, v in loaded_dict.items():
        name = k[7:] # module字段在最前面,从第7个字符开始就可以去掉module
        new_state_dict[name] = v #新字典的key值对应的value一一对应
    loaded_model = models.resnet152()   #注意这里需要对模型结构有定义
    loaded_model.state_dict = new_state_dict
    loaded_model = loaded_model.cuda()
    ## 使用replace操作去除module
    loaded_model = models.resnet152()    
    loaded_dict = torch.load(save_dir)
    loaded_model.load_state_dict({k.replace('module.', ''): v for k, v in loaded_dict.items()})
    
    
    • 多卡保存+多卡加载
      • 主要问题device id匹配问题,使用的GPU id要和保存信息匹配
    # 读取整个模型再使用nn.DataParallel进行分布式训练设置
    ## 可能
    # 读取整个模型单不使用nn.DataParallel进行分布式训练设置
    ## 
    # 读取模型权重,之后再使用nn.DataParallel进行分布式训练设置(推荐)
    import torch
    from torchvision import models
    os.environ['CUDA_VISIBLE_DEVICES'] = '0,1,2'   #这里替换成希望使用的GPU编号
    model = models.resnet152(pretrained=True)
    model = nn.DataParallel(model).cuda()
    
    # 保存+读取模型权重
    torch.save(model.state_dict(), save_dir)
    loaded_dict = torch.load(save_dir)
    loaded_model = models.resnet152()   #注意这里需要对模型结构有定义
    loaded_model = nn.DataParallel(loaded_model).cuda()
    loaded_model.state_dict = loaded_dict
    # 读取整个模型
    loaded_whole_model = torch.load(save_dir)#读取模型
    loaded_model = models.resnet152()   #注意这里需要对模型结构有定义
    loaded_model.state_dict = loaded_whole_model.state_dict
    loaded_model = nn.DataParallel(loaded_model).cuda()
    

参考

模型的区别和使用
U-Net项目介绍

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值