随手笔记前言
Pytorch简单用法(训练预训练模型案例)
一、写该随手目的?
Pytorch简单用法(训练预训练模型案例)
二、随手笔记内容
1.代码
train.py代码:
# 以CIFAR10数据集为例,展示一下完整的模型训练套路,完成对数据集的分类问题
import torch
import torchvision
from torch import nn
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
import torch.nn.functional as F
from torch.utils.tensorboard import SummaryWriter
from torchvision import models
import time
from PIL import Image
# 定义训练设备
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
# 标准化处理
transform = transforms.Compose(
[
transforms.Resize((224, 224)),
transforms.ToTensor(), # 将图片转换为tensor
transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
]
)
classes = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
# 读取图片
img = Image.open('./images/horse.jpg') # 载入图像 高度,宽度,channal
img = transform(img) # [C, H, W] 预处理 深度 高度 宽度
img = torch.unsqueeze(img, dim=0) # [N, C, H, W] 增加一个维度
img = img.to(device)
# 准备数据集
train_data = torchvision.datasets.CIFAR10(root='../dataset', train=True, transform=transform,
download=True)
test_data = torchvision.datasets.CIFAR10(root="../dataset", train=False, transform=transform,
download=True)
# 获得数据集的长度
train_data_size = len(train_data)
test_data_size = len(test_data)
print("训练数据集及的长度为: {}".format(train_data_size))
print("测试数据集及的长度为: {}".format(test_data_size))
# 利用DataLoader来记载数据
train_dataloader = DataLoader(train_data, batch_size=64)
test_dataloader = DataLoader(test_data, batch_size=64)
# 查看数据
for step, (b_x, b_y) in enumerate(train_dataloader):
if step > 0:
break
print('数据集的label:', b_y.numpy()) # 将tensor转成numpy
print(b_x.shape)
print(b_y.shape)
# 创建网络模型
# (1)自建网络
# class Model(nn.Module): #类Lenet继承nn.Module父类
# def __init__(self):
# super().__init__() #super函数解决多继承可能遇到的问题
# self.conv1 = nn.Conv2d(3, 16, 5) #定义第一个卷积层 3个通道(RGB),16个卷积核,5代表尺度,5x5的卷积核
# self.pool1 = nn.MaxPool2d(2, 2) #定义池化层(下采样层),只改变图片的高和宽。池化核2x2,步距为2的最大池化操作
# self.conv2 = nn.Conv2d(16, 32, 5) #定义第二个卷积层,深度为16,卷积核32,5x5的卷积核
# self.pool2 = nn.MaxPool2d(2, 2) #定义第二个池化层
# self.fc1 = nn.Linear(32*5*5, 120) #定义第一个全连接层,全连接层输入是一维向量,需要将特征矩阵展平
# self.fc2 = nn.Linear(120, 84) #定义第二个全连接层,120为上一层的输出
# self.fc3 = nn.Linear(84, 10) #定义第三个全连接层,84为上一层的输出 10根据super修改,这里是10个类别
#
# def forward(self, x): #正向传播 x代表输入的数据
# x = F.relu(self.conv1(x)) # input(3, 32, 32) 第一层输出 output(16, 28, 28)
# x = self.pool1(x) # output(16, 14, 14) 通过最大池化后,高度和宽度降为原来的一半,深度不变16
# x = F.relu(self.conv2(x)) # output(32, 10, 10) N = (W-F+2P)/S +1 => N=(14-5+2x0)/1 +1 = 10
# x = self.pool2(x) # output(32, 5, 5) 通过第二个池化层,高度和宽度降为原来的一半
# x = x.view(-1, 32*5*5) # output(32*5*5) 将全连接层拼接,变成一维向量,-1表示第一个维度,展平后的个数:32*5*5
# x = F.relu(self.fc1(x)) # output(120)
# x = F.relu(self.fc2(x)) # output(84)
# x = self.fc3(x) # output(10)
# return x
# (2)微调预训练的VGG16网络(冻结特征提取层)
# vgg16 = models.vgg16(weights=True)
# print(vgg16)
# vgg = vgg16.features # 获取VGG16的特征提取层
# for param in vgg.parameters(): # 将VGG16的特征提取层参数冻结,不对其进行更新
# param.requires_grad_(False)
#
# class Model(nn.Module):
# def __init__(self):
# super().__init__()
# self.vgg = vgg
# self.classifier = nn.Sequential(
# nn.Linear(25088, 512),
# nn.ReLU(),
# nn.Dropout(p=0.5),
# nn.Linear(512, 256),
# nn.ReLU(),
# nn.Dropout(p=0.5),
# nn.Linear(256, 10)
# )
#
# def forward(self, x):
# x = self.vgg(x)
# x = x.view(x.size(0), -1)
# x = self.classifier(x)
# return x
# model = Model()
# model = model.to(device)
# print(model)
# # x = torch.rand([64, 3, 32, 32])
# # x = x.to(device)
# # output = model(x)
# # print()
#
# (3)微调GoogleNet(不冻结参数)
model = models.googlenet(weights=True)
model.fc = nn.Linear(in_features=1024, out_features=10, bias=True)
model = model.to(device)
print(model)
# x = torch.rand([64, 3, 224, 224])
# x = x.to(device)
# output = model(x)
# print()
# # (4)微调ResNet152(不冻结参数)
# model = models.resnet152(weights=True)
# model.fc = nn.Linear(in_features=2048, out_features=10, bias=True)
# model = model.to(device)
# print(model)
# x = torch.rand([64, 3, 224, 224])
# x = x.to(device)
# output = model(x)
# print()
# 创建损失函数
loss_fn = nn.CrossEntropyLoss()
loss_fn = loss_fn.to(device)
# 优化器
learning_rate = 1e-3
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
# 设置训练网络的一些参数
total_train_step = 0
total_test_step = 0
epoch = 10
writer = SummaryWriter('logs_train_test')
start_time = time.time()
# 训练
for i in range(epoch):
print("------第 {} 轮训练开始------".format(i + 1))
# 开始训练
for data in train_dataloader:
imgs, targets = data
imgs = imgs.to(device)
targets = targets.to(device)
outputs = model(imgs)
loss = loss_fn(outputs, targets)
optimizer.zero_grad() # 优化过程中首先要使用优化器进行梯度清零
loss.backward() # 调用得到的损失,利用反向传播,得到每一个参数节点的梯度
optimizer.step() # 对参数进行优化
total_train_step += 1 # 上面就是进行了一次训练,训练次数 +1
# 只有训练步骤是100 倍数的时候才打印数据,可以减少一些没有用的数据,方便我们找到其他数据
if total_train_step % 100 == 0:
end_time = time.time()
print("训练时间: {}".format(end_time - start_time))
print("训练次数: {}, Loss: {}".format(total_train_step, loss))
writer.add_scalar('train_loss', loss.item(), total_train_step)
# 测试集测试
total_test_loss = 0
total_accuracy = 0
with torch.no_grad():
for data in test_dataloader:
imgs, targets = data
imgs = imgs.to(device)
targets = targets.to(device)
outputs = model(imgs)
loss = loss_fn(outputs, targets)
total_test_loss += loss
accuracy = (outputs.argmax(1) == targets).sum()
total_accuracy += accuracy
# output_little = model(img) # 单独测试图片
# output_little = output_little.to('cpu')
# predict = torch.max(output_little, dim=1)[1].numpy()[0] # 找到预测最大值的index(索引)第0个维度是batchsize,第1个维度才是channal
# print(classes[predict])
print("整体测试集上的loss: {}".format(total_test_loss))
print("整体测试集上的正确率: {}".format(total_accuracy / test_data_size))
writer.add_scalar("test_loss", total_test_loss)
writer.add_scalar("test_accuracy", total_accuracy / test_data_size, total_test_step)
total_test_loss += 1 # 测试完了之后要 +1
torch.save(model, './model_googlenet/model_{}.pth'.format(i)) # 直接存模型
print('模型已保存')
writer.close()
test.py代码:
import torch
import torchvision.transforms as transforms
from PIL import Image
import torchvision.models as models
import torch.nn as nn
# 定义训练设备
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
# 标准化处理
transform = transforms.Compose(
[
transforms.Resize((224, 224)),
transforms.ToTensor(), # 将图片转换为tensor
transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
]
)
classes = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
model = torch.load('./model_googlenet/model_9.pth') # 实例化
# print(model)
img = Image.open('images/frog.jpg') # 载入图像 高度,宽度,channal
img = transform(img) # [C, H, W] 预处理 深度 高度 宽度
img = torch.unsqueeze(img, dim=0) # [N, C, H, W] 增加一个维度
img = img.to(device)
model.eval() # 必须把网络至于推理模式,如果不这么做,那么一些预训练的过的模型,如批量归一化(Batch Normalization)和丢弃法(Dropout)
# 将不会产生有意义的答案,仅仅是因为它们内部工作的方式
outputs = model(img)
outputs = outputs.to('cpu')
predict = torch.max(outputs, dim=1)[1].numpy()[0] # 找到预测最大值的index(索引)第0个维度是batchsize,第1个维度才是channal
print(classes[predict])
2.展示结果
test.py测试用的图片是frog.jpg
测试结果如下: