#encoding:utf-8
import torch
import torch.nn as nnimport torchvision
from torchsummary import summary
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
device = torch.device('cuda:0' if torch.cuda.is_available() else "cpu") # its tpye is cuda
一、保存-加载模型
1、只保存模型权重、优化器权重、或epoch信息等参数
torch.save(model.state_dict(), './model.pth') # 只保存模型权重参数,不保存模型结构
weights_dict = torch.load('./model.pth', map_location=device)
# 这里根据模型结构,加载存储的模型参数,strict允许部分加载
model = Model().to(device)
load_weights_dict = {k: v for k, v in weights_dict.items()
if model.state_dict()[k].numel() == v.numel()}
model.load_state_dict(load_weights_dict, strict=False)
dir = 'mymodel.pth'
state = {'net':model.state_dict(), 'optimizer':optimizer.state_dict(), 'epoch':epoch}
torch.save(state, dir) # 权重参数包括了模型权重、优化器权重、epoch
checkpoint = torch.load(dir)
#逐个加载
model.load_state_dict(checkpoint['net'])
optimizer.load_state_dict(checkpoint['optimizer'])
start_epoch = checkpoint['epoch'] + 1
2、保存模型权重,和结构
torch.save(model, './model.pth') # 保存整个model的状态
model02 = torch.load('model.pth') # 这里已经不需要重构模型结构了,直接加载就可以
二、加载预训练的经典模型
.pt .pth .pkl:它们并不是在格式上有区别,只是后缀不同而已(仅此而已)
import torchvision.models as models # 预训练模型都在这里面
resnet18 = models.resnet18(pretrained=True) # 加载预训练模型和参数
resnet18 = models.resnet18(pretrained=False) # 模型没有预训练,导入模型结构
resnet18.load_state_dict(torch.load('resnet18.pth')) # 加载预先下载好的预训练参数到resnet18
三、model.state_dict() 测试、模型打印测试
已知的有两种打印方式
# define model
class Model(nn.Module):
def __init__(self, num_classes=10):
super(Model, self).__init__()
# 1. 卷积层: 输入通道为1(比如灰度图像),输出通道为16,卷积核大小为3x3
self.conv1 = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=3, stride=1, padding=1)
# 2. 池化层: 最大池化,2x2 池化核
self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
# 3. 全连接层: 假设输入图片大小为28x28
# 池化后的输出大小: 16通道,每个通道大小为 14x14 (由于池化将尺寸减半)
self.fc1 = nn.Linear(16 * 14 * 14, 128) # 全连接层,输入 16 * 14 * 14,输出128维
self.fc2 = nn.Linear(128, num_classes) # 最终全连接层,输出类别数
def forward(self, x):
# 1. 卷积 + 激活 + 池化
x = self.pool(F.relu(self.conv1(x))) # [batch_size, 1, 28, 28] -> [batch_size, 16, 14, 14]
# 2. 将张量展平为一维
x = x.view(-1, 16 * 14 * 14) # [batch_size, 16, 14, 14] -> [batch_size, 16 * 14 * 14]
# 3. 全连接层 + 激活
x = F.relu(self.fc1(x)) # [batch_size, 128]
# 4. 输出层 (没有激活函数,后续使用 Softmax 或 CrossEntropyLoss)
x = self.fc2(x) # [batch_size, num_classes]
return x
def main():
# Initialize model
model = Model(num_classes=10)
# Initialize optimizer
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
# 检查模型在随机输入数据上的输出
input_data = torch.randn(1, 1, 28, 28) # 假设输入是大小为28x28的单通道图像
output = model(input_data)
print("output.shape = ", output.shape) # 输出应为 [1, 10],表示有10个类别的分类结果
print('############################################################')
# 打印方式一:直接打印网络
print('model ')
print(model)
print('############################################################')
print('Model.state_dict:')
for param_tensor in model.state_dict():
# 打印 key value字典
print(param_tensor, '\t', model.state_dict()[param_tensor].size())
print('############################################################')
print("Optimizer's state_dict: ")
for var_name in optimizer.state_dict():
print(var_name, '\t', optimizer.state_dict()[var_name])
# 打印方式二:模型导入设备后打印
print('############################################################')
t = model.to(device)
summary(model=t, input_size=(1, 28, 28))
if __name__ == '__main__':
main()
输出结果
'''
output.shape = torch.Size([1, 10])
############################################################
model
Model(
(conv1): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(fc1): Linear(in_features=3136, out_features=128, bias=True)
(fc2): Linear(in_features=128, out_features=10, bias=True)
)
############################################################
Model.state_dict:
conv1.weight torch.Size([16, 1, 3, 3])
conv1.bias torch.Size([16])
fc1.weight torch.Size([128, 3136])
fc1.bias torch.Size([128])
fc2.weight torch.Size([10, 128])
fc2.bias torch.Size([10])
############################################################
Optimizer's state_dict:
state {}
param_groups [{'lr': 0.001, 'momentum': 0.9, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'maximize': False, 'foreach': None, 'differentiable': False, 'params': [0, 1, 2, 3, 4, 5]}]
############################################################
----------------------------------------------------------------
Layer (type) Output Shape Param #
================================================================
Conv2d-1 [-1, 16, 28, 28] 160
MaxPool2d-2 [-1, 16, 14, 14] 0
Linear-3 [-1, 128] 401,536
Linear-4 [-1, 10] 1,290
================================================================
Total params: 402,986
Trainable params: 402,986
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.12
Params size (MB): 1.54
Estimated Total Size (MB): 1.66
----------------------------------------------------------------
'''
参考: