模型定义的方式
- 模型定义的三种方式
模型定义方式 | 对应操作 |
---|---|
Sequential | 向前计算方式为简单串联各个层的计算时,通过接收一个子模块的有序字典或一系列子模块作为参数 |
ModuleList | 以列表为参数 |
ModuleDict | 以字典为参数 |
#Sequential
#简单,适合用于快速验证想法,小型模型适合
import torch.nn as nn
import collections
## 直接排列
net = nn.Sequential(
nn.Linear(784,256)
nn.ReLU(),
nn.Linear(256,10),
)
## 使用有序字典(OrderedDict)
net2 = nn.Sequential(collections.OrderedDict([
('fc1',nn.Linear(784,256)),
('relu1',nn.ReLU()),
('fc2',nn.Linear(256,10))
]))
## 使用
class Net(nn.Module):
def __init__(self):
super(Net,self).__init__()
self.module = nn.Sequential(
nn.Linear(784,256),
nn.ReLU(),
nn.Linear()
)
def forward(self,x):
x = self.module(x)
return x
#灵活,对于需要重复实现相同层时非常方便
#ModuleList
net = nn.ModuleList([nn.Linear(784,256),nn.ReLU])
net.append(nn.Linear(256,10))#可以索引
## 使用
class model(nn.Module):
def __init__(self,net):
super(model,self).__init__()
self.modulelist = net
...
def forward(self,x):
for layer in self.modulelist:
x = layer(x)
return x
##
#ModuleDict
net = nn.ModuleDict({
'linear':nn.Linear(784,256),
'act':nn.ReLU(),
})
net['output'] = nn.Linear(256,10)#添加
## 使用 与ModuleList相似
利用模型块快速搭建复杂网络
以U-Net模型为例
具体思路为:每层单独设计为一个模块,然后在合在一起使用(分总思路)
import torch
import torch.nn as nn
import torch.nn.functional as F
class DoubleConv(nn.Module):#两次卷积
"""(convolution => [BN] => ReLU) * 2"""
def __init__(self, in_channels, out_channels, mid_channels=None):
super().__init__()
if not mid_channels:
mid_channels = out_channels
self.double_conv = nn.Sequential(
nn.Conv2d(in_channels, mid_channels, kernel_size=3, padding=1, bias=False),
nn.BatchNorm2d(mid_channels),
nn.ReLU(inplace=True),
nn.Conv2d(mid_channels, out_channels, kernel_size=3, padding=1, bias=False),
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True)
)
def forward(self, x):
return self.double_conv(x)
class Down(nn.Module):#下采样连接
"""Downscaling with maxpool then double conv"""
def __init__(self, in_channels, out_channels):
super().__init__()
self.maxpool_conv = nn.Sequential(
nn.MaxPool2d(2),
DoubleConv(in_channels, out_channels)
)
def forward(self, x):
return self.maxpool_conv(x)
class Up(nn.Module):#上采样连接
"""Upscaling then double conv"""
def __init__(self, in_channels, out_channels, bilinear=False):
super().__init__()
if bilinear:
self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
self.conv = DoubleConv(in_channels, out_channels, in_channels // 2)
else:
self.up = nn.ConvTranspose2d(in_channels, in_channels // 2, kernel_size=2, stride=2)
self.conv = DoubleConv(in_channels, out_channels)
def forward(self, x1, x2):
x1 = self.up(x1)
# input is CHW
diffY = x2.size()[2] - x1.size()[2]
diffX = x2.size()[3] - x1.size()[3]
x1 = F.pad(x1, [diffX // 2, diffX - diffX // 2,
diffY // 2, diffY - diffY // 2])
x = torch.cat([x2, x1], dim=1)
return self.conv(x)
class OutConv(nn.Module):#输出层卷积
def __init__(self, in_channels, out_channels):
super(OutConv, self).__init__()
self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1)
def forward(self, x):
return self.conv(x)
class UNet(nn.Module):#将模块拼合在一起
def __init__(self, n_channels, n_classes, bilinear=False):
super(UNet, self).__init__()
self.n_channels = n_channels
self.n_classes = n_classes
self.bilinear = bilinear
self.inc = DoubleConv(n_channels, 64)
self.down1 = Down(64, 128)
self.down2 = Down(128, 256)
self.down3 = Down(256, 512)
factor = 2 if bilinear else 1
self.down4 = Down(512, 1024 // factor)
self.up1 = Up(1024, 512 // factor, bilinear)
self.up2 = Up(512, 256 // factor, bilinear)
self.up3 = Up(256, 128 // factor, bilinear)
self.up4 = Up(128, 64, bilinear)
self.outc = OutConv(64, n_classes)
def forward(self, x):
x1 = self.inc(x)
x2 = self.down1(x1)
x3 = self.down2(x2)
x4 = self.down3(x3)
x5 = self.down4(x4)
x = self.up1(x5, x4)
x = self.up2(x, x3)
x = self.up3(x, x2)
x = self.up4(x, x1)
logits = self.outc(x)
return logits
修改、保存与读取模型
修改
- 目的:在已有模型上对结构进行修改
import torchvision.models as models
net = models.resnet50()
print(net)
from collections import OrderedDict
classifier = nn.Sequential(OrderedDict([('fc1', nn.Linear(2048, 128)),
('relu1', nn.ReLU()),
('dropout1',nn.Dropout(0.5)),
('fc2', nn.Linear(128, 10)),
('output', nn.Softmax(dim=1))
]))
net.fc = classifier
#修改操作1,添加外部输入
## 将原模型添加输入位置前的部分作为一个整体,同时在forward中定义好原模型不变的部分、添加的输入和后续层之间的连接关系,从而完成模型的修改
class Model(nn.Module):
def __init__(self, net):
super(Model, self).__init__()
self.net = net
self.relu = nn.ReLU()
self.dropout = nn.Dropout(0.5)
self.fc_add = nn.Linear(1001, 10, bias=True)
self.output = nn.Softmax(dim=1)
def forward(self, x, add_variable):#add_variable额外输入
x = self.net(x)
x = torch.cat((self.dropout(self.relu(x)), add_variable.unsqueeze(1)),1)
x = self.fc_add(x)
x = self.output(x)
return x
import torchvision.models as models
net = models.resnet50()
model = Model(net).cuda()
outputs = model(inputs, add_var)
#修改操作2,添加额外输出
##便于观察每层运行情况
class Model(nn.Module):
def __init__(self, net):
super(Model, self).__init__()
self.net = net
self.relu = nn.ReLU()
self.dropout = nn.Dropout(0.5)
self.fc1 = nn.Linear(1000, 10, bias=True)
self.output = nn.Softmax(dim=1)
def forward(self, x, add_variable):
x1000 = self.net(x)
x10 = self.dropout(self.relu(x1000))
x10 = self.fc1(x10)
x10 = self.output(x10)
return x10, x1000#额外输出添加
import torchvision.models as models
net = models.resnet50()
model = Model(net).cuda()
out10, out1000 = model(inputs, add_var)
保存&读取
- pytorch存储模型主要采取pkl,pt,pth
- 一个pytorch模型主要包含两给部分,模型结构和权重
- 模型:继承nn.Module的类
- 权重:一个字典(key是层名,value是权重向量)
- 可以两者皆存,也可只存权重
import os
import torch
from torchvision import models
model = models.resnet152(pretrained=True)#使用预训练模型
os.environ['CUDA_VISIBLE_DEVICES'] = '0'#多个就是多卡
model.cuda()#加载到单卡上去
model = torch.nn.DataParallel(model).cuda()#加载到多卡上去
# 保存整个模型
torch.save(model, save_dir)
loaded_model = torch.load(save_dir)
# 保存模型权重
torch.save(model.state_dict, save_dir)
loaded_dict =torch.load(save_dir)
-
单卡和多卡存储的区别
- 层名区别
多卡并行的模型每层的名称比单卡模型前多了一个“module” - 单卡保存+单卡加载
import os import torch from torchvision import models os.environ['CUDA_VISIBLE_DEVICES']='0' #保存+读取 模型 torch.save(model,save_dir) loaded_model = torch.load(save_dir) loaded_model.cuda() #保存+读取 权重 torch.save(model.state_dict(),save_dir) loaded_dict =torch.load(save_dir) loaded_model = models.resnet152()#需要对模型结构定义 loaded_model.state_dict = loaded_dict#将取到的权重加载到模型中
-
单卡保存+多卡加载 (不需多言)
-
多卡保存+单卡加载 (单卡加载时需要去除 多卡模型中的 module标识)
## 关键点在于如何去除命名中的标识 module 确保模型统一性 os.environ['CUDA_VISIBLE_DEVICES'] = '1,2' #这里替换成希望使用的GPU编号 model = models.resnet152(pretrained=True) model = nn.DataParallel(model).cuda()#多卡加载 # 保存+读取 整个模型 (比较简单 直接获取module字段对应内容即可) torch.save(model, save_dir)#保存 os.environ['CUDA_VISIBLE_DEVICES'] = '0' #这里替换成希望使用的GPU编号 loaded_model = torch.load(save_dir) loaded_model = loaded_model.module#操作在这里 # 保存+读取 模型权重 torch.save(model.state_dict(), save_dir)#保存未变化 ## 读取还是按多卡读取,单启用的device id只有一个 os.environ['CUDA_VISIBLE_DEVICES'] = '0' #这里替换成希望使用的GPU编号 loaded_dict = torch.load(save_dir) loaded_model = models.resnet152() #注意这里需要对模型结构有定义 loaded_model = nn.DataParallel(loaded_model).cuda()#按多卡读取权重 loaded_model.state_dict = loaded_dict ## 遍历字典去除module from collections import OrderedDict os.environ['CUDA_VISIBLE_DEVICES'] = '0' #这里替换成希望使用的GPU编号 loaded_dict = torch.load(save_dir) new_state_dict = OrderedDict() for k, v in loaded_dict.items(): name = k[7:] # module字段在最前面,从第7个字符开始就可以去掉module new_state_dict[name] = v #新字典的key值对应的value一一对应 loaded_model = models.resnet152() #注意这里需要对模型结构有定义 loaded_model.state_dict = new_state_dict loaded_model = loaded_model.cuda() ## 使用replace操作去除module loaded_model = models.resnet152() loaded_dict = torch.load(save_dir) loaded_model.load_state_dict({k.replace('module.', ''): v for k, v in loaded_dict.items()})
- 多卡保存+多卡加载
- 主要问题device id匹配问题,使用的GPU id要和保存信息匹配
# 读取整个模型再使用nn.DataParallel进行分布式训练设置 ## 可能 # 读取整个模型单不使用nn.DataParallel进行分布式训练设置 ## # 读取模型权重,之后再使用nn.DataParallel进行分布式训练设置(推荐) import torch from torchvision import models os.environ['CUDA_VISIBLE_DEVICES'] = '0,1,2' #这里替换成希望使用的GPU编号 model = models.resnet152(pretrained=True) model = nn.DataParallel(model).cuda() # 保存+读取模型权重 torch.save(model.state_dict(), save_dir) loaded_dict = torch.load(save_dir) loaded_model = models.resnet152() #注意这里需要对模型结构有定义 loaded_model = nn.DataParallel(loaded_model).cuda() loaded_model.state_dict = loaded_dict # 读取整个模型 loaded_whole_model = torch.load(save_dir)#读取模型 loaded_model = models.resnet152() #注意这里需要对模型结构有定义 loaded_model.state_dict = loaded_whole_model.state_dict loaded_model = nn.DataParallel(loaded_model).cuda()
- 层名区别