【Pytorch简单用法(训练预训练模型案例)】

最新推荐文章于 2024-08-07 08:05:21 发布

文文戴

最新推荐文章于 2024-08-07 08:05:21 发布

阅读量356

点赞数

文章标签： pytorch 人工智能 python

本文链接：https://blog.csdn.net/u010442263/article/details/131356568

版权

随手笔记前言

Pytorch简单用法(训练预训练模型案例)

一、写该随手目的？

Pytorch简单用法(训练预训练模型案例)

二、随手笔记内容

1.代码

train.py代码：

# 以CIFAR10数据集为例，展示一下完整的模型训练套路，完成对数据集的分类问题

import torch
import torchvision
from torch import nn
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
import torch.nn.functional as F
from torch.utils.tensorboard import SummaryWriter
from torchvision import models
import time
from PIL import Image

# 定义训练设备
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

# 标准化处理
transform = transforms.Compose(
    [
        transforms.Resize((224, 224)),
        transforms.ToTensor(),  # 将图片转换为tensor
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ]
)

classes = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']

# 读取图片
img = Image.open('./images/horse.jpg')  # 载入图像 高度，宽度，channal
img = transform(img)  # [C, H, W] 预处理 深度 高度 宽度
img = torch.unsqueeze(img, dim=0)  # [N, C, H, W] 增加一个维度
img = img.to(device)

# 准备数据集
train_data = torchvision.datasets.CIFAR10(root='../dataset', train=True, transform=transform,
                                          download=True)
test_data = torchvision.datasets.CIFAR10(root="../dataset", train=False, transform=transform,
                                         download=True)

# 获得数据集的长度
train_data_size = len(train_data)
test_data_size = len(test_data)
print("训练数据集及的长度为: {}".format(train_data_size))
print("测试数据集及的长度为: {}".format(test_data_size))

# 利用DataLoader来记载数据
train_dataloader = DataLoader(train_data, batch_size=64)
test_dataloader = DataLoader(test_data, batch_size=64)

# 查看数据
for step, (b_x, b_y) in enumerate(train_dataloader):
    if step > 0:
        break
print('数据集的label:', b_y.numpy())  # 将tensor转成numpy
print(b_x.shape)
print(b_y.shape)


# 创建网络模型
# (1)自建网络
# class Model(nn.Module):   #类Lenet继承nn.Module父类
#     def __init__(self):
#         super().__init__()         #super函数解决多继承可能遇到的问题
#         self.conv1 = nn.Conv2d(3, 16, 5)      #定义第一个卷积层 3个通道（RGB），16个卷积核，5代表尺度，5x5的卷积核
#         self.pool1 = nn.MaxPool2d(2, 2)       #定义池化层（下采样层），只改变图片的高和宽。池化核2x2，步距为2的最大池化操作
#         self.conv2 = nn.Conv2d(16, 32, 5)     #定义第二个卷积层，深度为16，卷积核32，5x5的卷积核
#         self.pool2 = nn.MaxPool2d(2, 2)       #定义第二个池化层
#         self.fc1 = nn.Linear(32*5*5, 120)     #定义第一个全连接层，全连接层输入是一维向量，需要将特征矩阵展平
#         self.fc2 = nn.Linear(120, 84)         #定义第二个全连接层，120为上一层的输出
#         self.fc3 = nn.Linear(84, 10)          #定义第三个全连接层，84为上一层的输出  10根据super修改，这里是10个类别
#
#     def forward(self, x):            #正向传播 x代表输入的数据
#         x = F.relu(self.conv1(x))    # input(3, 32, 32) 第一层输出 output(16, 28, 28)
#         x = self.pool1(x)            # output(16, 14, 14) 通过最大池化后，高度和宽度降为原来的一半，深度不变16
#         x = F.relu(self.conv2(x))    # output(32, 10, 10) N = (W-F+2P)/S +1 => N=(14-5+2x0)/1 +1 = 10
#         x = self.pool2(x)            # output(32, 5, 5)   通过第二个池化层，高度和宽度降为原来的一半
#         x = x.view(-1, 32*5*5)       # output(32*5*5)     将全连接层拼接，变成一维向量，-1表示第一个维度，展平后的个数：32*5*5
#         x = F.relu(self.fc1(x))      # output(120)
#         x = F.relu(self.fc2(x))      # output(84)
#         x = self.fc3(x)              # output(10)
#         return x

# (2)微调预训练的VGG16网络(冻结特征提取层)
# vgg16 = models.vgg16(weights=True)
# print(vgg16)
# vgg = vgg16.features  # 获取VGG16的特征提取层
# for param in vgg.parameters():  # 将VGG16的特征提取层参数冻结，不对其进行更新
#     param.requires_grad_(False)
#
# class Model(nn.Module):
#     def __init__(self):
#         super().__init__()
#         self.vgg = vgg
#         self.classifier = nn.Sequential(
#             nn.Linear(25088, 512),
#             nn.ReLU(),
#             nn.Dropout(p=0.5),
#             nn.Linear(512, 256),
#             nn.ReLU(),
#             nn.Dropout(p=0.5),
#             nn.Linear(256, 10)
#         )
#
#     def forward(self, x):
#         x = self.vgg(x)
#         x = x.view(x.size(0), -1)
#         x = self.classifier(x)
#         return x

# model = Model()
# model = model.to(device)
# print(model)
# # x = torch.rand([64, 3, 32, 32])
# # x = x.to(device)
# # output = model(x)
# # print()
#


# (3)微调GoogleNet(不冻结参数)
model = models.googlenet(weights=True)
model.fc = nn.Linear(in_features=1024, out_features=10, bias=True)
model = model.to(device)
print(model)
# x = torch.rand([64, 3, 224, 224])
# x = x.to(device)
# output = model(x)
# print()


# # (4)微调ResNet152(不冻结参数)
# model = models.resnet152(weights=True)
# model.fc = nn.Linear(in_features=2048, out_features=10, bias=True)
# model = model.to(device)
# print(model)
# x = torch.rand([64, 3, 224, 224])
# x = x.to(device)
# output = model(x)
# print()

# 创建损失函数
loss_fn = nn.CrossEntropyLoss()
loss_fn = loss_fn.to(device)

# 优化器
learning_rate = 1e-3
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# 设置训练网络的一些参数
total_train_step = 0
total_test_step = 0
epoch = 10

writer = SummaryWriter('logs_train_test')
start_time = time.time()

# 训练
for i in range(epoch):
    print("------第 {} 轮训练开始------".format(i + 1))

    # 开始训练
    for data in train_dataloader:
        imgs, targets = data
        imgs = imgs.to(device)
        targets = targets.to(device)
        outputs = model(imgs)
        loss = loss_fn(outputs, targets)

        optimizer.zero_grad()  # 优化过程中首先要使用优化器进行梯度清零
        loss.backward()  # 调用得到的损失，利用反向传播，得到每一个参数节点的梯度
        optimizer.step()  # 对参数进行优化
        total_train_step += 1  # 上面就是进行了一次训练，训练次数 +1

        # 只有训练步骤是100 倍数的时候才打印数据，可以减少一些没有用的数据，方便我们找到其他数据
        if total_train_step % 100 == 0:
            end_time = time.time()
            print("训练时间: {}".format(end_time - start_time))
            print("训练次数: {}, Loss: {}".format(total_train_step, loss))
            writer.add_scalar('train_loss', loss.item(), total_train_step)

    # 测试集测试
    total_test_loss = 0
    total_accuracy = 0
    with torch.no_grad():
        for data in test_dataloader:
            imgs, targets = data
            imgs = imgs.to(device)
            targets = targets.to(device)
            outputs = model(imgs)
            loss = loss_fn(outputs, targets)
            total_test_loss += loss
            accuracy = (outputs.argmax(1) == targets).sum()
            total_accuracy += accuracy

            # output_little = model(img)  # 单独测试图片
            # output_little = output_little.to('cpu')
            # predict = torch.max(output_little, dim=1)[1].numpy()[0]  # 找到预测最大值的index（索引）第0个维度是batchsize，第1个维度才是channal
            # print(classes[predict])

    print("整体测试集上的loss: {}".format(total_test_loss))
    print("整体测试集上的正确率: {}".format(total_accuracy / test_data_size))
    writer.add_scalar("test_loss", total_test_loss)
    writer.add_scalar("test_accuracy", total_accuracy / test_data_size, total_test_step)
    total_test_loss += 1  # 测试完了之后要 +1

    torch.save(model, './model_googlenet/model_{}.pth'.format(i))  # 直接存模型
    print('模型已保存')

writer.close()

test.py代码：

import torch
import torchvision.transforms as transforms
from PIL import Image
import torchvision.models as models
import torch.nn as nn

# 定义训练设备
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

# 标准化处理
transform = transforms.Compose(
    [
        transforms.Resize((224, 224)),
        transforms.ToTensor(),  # 将图片转换为tensor
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ]
)

classes = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']

model = torch.load('./model_googlenet/model_9.pth')  # 实例化
# print(model)

img = Image.open('images/frog.jpg')  # 载入图像 高度，宽度，channal
img = transform(img)  # [C, H, W] 预处理 深度 高度 宽度
img = torch.unsqueeze(img, dim=0)  # [N, C, H, W] 增加一个维度
img = img.to(device)

model.eval()  # 必须把网络至于推理模式，如果不这么做，那么一些预训练的过的模型，如批量归一化（Batch Normalization）和丢弃法（Dropout）
# 将不会产生有意义的答案，仅仅是因为它们内部工作的方式
outputs = model(img)
outputs = outputs.to('cpu')
predict = torch.max(outputs, dim=1)[1].numpy()[0]  # 找到预测最大值的index（索引）第0个维度是batchsize，第1个维度才是channal
print(classes[predict])