Pytorch框架简单使用学习

学习资料:
b站小土堆 【PyTorch深度学习快速入门教程(绝对通俗易懂!)【小土堆】】https://www.bilibili.com/video/BV1hE411t7RN?vd_source=aa9d929b905492aca44f9e32ddf94a6e

不是干货,只是记录一些简单入门级用法与框架,方便后续查找使用,如果对你有帮助,是我的福气hhh。

ctrl+p 查看需要参数类型

ctrl+/ 选中代码被注释

ctrl+b 查看package源代码

简单工具函数

  • dir()函数用来查看有哪些package

dir(torch.cuda)

  • help函数用来查看一个具体函数的功能

help(torch.cuda.is_available)

数据

1.data

各种类别的数据

2.dataset

from torch.utils.data import Dataset
from PIL import Image
import os

class MyData(Dataset):

    def __init__(self,root_dir,label_dir):
        self.root_dir = root_dir
        self.label_dir = label_dir
        self.path = os.path.join(self.root_dir,self.label_dir)
        self.img_path_list = os.listdir(self.path)
  • 获取特定特定类别的每一个数据及其label
# 重写getitem函数,返回img和label
 def __getitem__(self, idx):

        img_name = self.img_path_list[idx]
        img_item_path =os.path.join(self.path,img_name)
        img = Image.open(img_item_path)
        label = self.label_dir
        return img,label


# 初始化Dataset类
root_dir = "dataset/train"
ants_label_dir ="ants"
ants_dataset = MyData(root_dir,ants_label_dir)
idx = 10

#使用getitem函数得到图片和标签
img,label = ants_dataset[10]
img.show()
  • 总共有多少的数据
 def __len__(self):
     return len(self.img_path_list)

#使用len函数得到数据集长度
ants_img_length = len(ants_dataset)
print(ants_img_length)
数据集形式
train   
-A   // 文件名就是label名
--imageA1
--imageA2
--...
-B
--imageB
--...
train   // label和img的文件名一一对应
-A_images
--aaa
--bbb
--...

-B_labels
--aaa
--bbb
--...

TensorBoard数据可视化
from torch.utils.tensorboard import SummaryWriter

# 初始化
writer = SummaryWriter("logs")

'''
图像保存在当前路径的logs文件夹中
终端tensorboard --logdir=logs [--port=6007]  可查看,默认端口6006
'''

# 图像导入
image_path = "dataset2/train/ants_image/0013035.jpg"
img_PIL = Image.open(image_path)
img_array = np.array(img_PIL)

writer.add_image("train", img1_array, 1, dataformats='HWC')
writer.add_image("train", img2_array, 2, dataformats='HWC')
'''
params:
	tag:图表的标题
	img_tensor:图像数据(tensor、nparray、string)
	global_step:步长(对应x轴)
	dataformats:有HWC,CHW,HW三种,可以使用print(img_array.shape)检验格式。

'''


# 图表绘制
for i in range(100):
    writer.add_scalar("y=2x", 3*i, i)
'''
params:
	tag:图表的标题
	scalar_value:需要保存的数值(对应y轴)
	global_step:步长(对应x轴)

'''

# 模型架构绘制
writer.add_graph()
'''
params:
	tag:图表的标题
	scalar_value:需要保存的数值(对应y轴)
	global_step:步长(对应x轴)

'''
writer.close()

3.dataloader

从dataset中取出数据,然后输入到神经网络中处理。

取出数据与输入数据的方式,就是又dataloader参数决定的

'''
   torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=None, sampler=None, batch_sampler=None, num_workers=0, collate_fn=None, pin_memory=False, drop_last=False, timeout=0, worker_init_fn=None, multiprocessing_context=None, generator=None, *, prefetch_factor=None, persistent_workers=False, pin_memory_device='')

Args:
    dataset
    batch_size:一个epoch中,每次取几张
    shuffle:每个epoch之后是否打乱数据
    num_workers:几个子进程加载数据
    drop_last:当total/batch_size不为整数,则剩余数据是否保留

'''

import torchvision
from torch.utils.data import DataLoader

test_data = torchvision.datasets.CIFAR10("./dataset3", train=False, transform=torchvision.transforms.ToTensor(), download=True)
test_dataloader = DataLoader(dataset=test_data, batch_size=64, shuffle=True, num_workers=0, drop_last=True)

# 只取了test_data中的一个数据
img, target = test_data[0]
print(img.shape)
print(target)

# test_dataloader中,每一个data为64个test_data的元素。
for data in test_dataloader:
    imgs, targets = data
    print(imgs.shape)
    print(targets)
    
'''
torch.Size([3, 32, 32])
3 

torch.Size([64, 3, 32, 32])
tensor([7, 9, 1, 1, 9, 4, 7, 2, 0, 6, 1, 6, 2, 9, 3, 5, 5, 8, 2, 5, 6, 4, 4, 5,8, 8, 0, 6, 0, 8, 4, 1, 1, 4, 6, 4, 7, 2, 4, 4, 4, 7, 2, 4, 3, 3, 3, 0, 9, 5, 2, 5, 6, 4, 7, 5, 9, 8, 6, 1, 3, 5, 2, 2])
'''    
 

test_dataloader 的数据加载过程实际上是延迟的。虽然 DataLoader 对象在初始化时设置了数据集和一些加载参数,但实际的数据加载是在迭代 test_dataloader 时才进行的。

# 可以看做是初始化了一个dataloader器
test_dataloader = DataLoader(dataset=test_data, batch_size=64, shuffle=True, num_workers=0, drop_last=True)


writer = SummaryWriter("logs")
for epoch in range(2):
    step = 0
    # 内循环时才真正执行了dalaload操作。
    # 每个epoch的内容,与shuffle参数有关。
    for data in test_dataloader:
        imgs, targets = data
        writer.add_images("Epoch: {}".format(epoch), imgs, step)
        step = step + 1

writer.close()

在这里插入图片描述

TorchVision

1.Transforms

特定格式的图片输入后经过transforms.py文件中一些Class实例工具,输出我们需要的图片结果。

from torchvision import transforms
'''
如何使用Class工具
    1.创建这个Class的实例,得到具体工具
    2.使用Class实例具体工具,输入——图像,输出——图像处理内容
'''
ToTensor

tensor数据类型包装了对象在神经网络中需要的一些参数

from PIL import Image
import cv2

image_path = "dataset2/train/ants_image/0013035.jpg"
img_PIL = Image.open(image_path)
img_array = cv2.imread(image_path)

# img PIL/img nparray-> ToTensor -> img tensor
tensor_trans = transforms.ToTensor()
img_tensor1 = tensor_trans(img_PIL)
img_tensor2 = tensor_trans(img_array)

Normalize

归一化一个tensor类型的数据

# img tensor-> Normalize -> img norm tensor
'''
 @params:
    mean:均值数组
    std:方差数组
    数组维度与通道个数有关
 @output[channel] = (input[channel] - mean[channel]) / std[channel]
'''
norm_trans = transforms.Normalize([0.5,0.5,0.5],[0.5,0.5,0.5])
img_norm = norm_trans(img_tensor1)

Resize

对图片进行size缩放

# img tensor/image PIL-> Normalize -> img resize tensor/PIL
'''
 @init_params:
    (height,weight):图片变换后的大小
 @output:大小缩放后的img
'''
resize_trans = transforms.Resize((512,512))
img_resize = resize_trans(img_tensor1)

Compose

结合多个transform的Class实例工具功能

'''
 @init_params:
    [transforms1,transforms2...]:Class实例工具数组
 output:img按照数组工具以流水线方式输入特定类型img‘
'''
compose_trans = transforms.Compose([resize_trans,
                                   tensor_trans,
                                   norm_trans
                                   ])

image_path = "dataset2/train/ants_image/255434217_1b2b3fe0a4.jpg"
img_PIL = Image.open(image_path)
img_norm = compose_trans(img_PIL)
RandomCrop

对图片按照指定size进行随机裁剪

'''
 @init_params:
    (height,weight):图片变换后的大小
 @output:大小随机裁剪后的img
'''
crop_trans = transforms.RandomCrop((200, 200))
compose_trans = transforms.Compose([crop_trans,
                                     tensor_trans
                                     ])

for i in range(10):
    img_norm = compose_trans(img_PIL)
    writer.add_image("random_test", img_norm, i)

2.TorchVision Dataset

在torchvision包中,提供了大量的公开数据集,因此我们在项目中可直接加载这些数据集进行处理,而不用手动download。

import torchvision
'''
dataset = torchvision.datasets.数据集名称([root=..., train=..., transform=..., download=...,...])

'''
train_set = torchvision.datasets.CIFAR10(root="./dataset3", train=True, transform=dataset_transform, download=True)
test_set = torchvision.datasets.CIFAR10(root="./dataset3", train=False, transform=dataset_transform, download=True)

神经网络基础 torch.nn

1. Container骨架

torch.nn.Module,即所有神经网络模块的基类,在使用时需要定义class继承nn.module

import torch.nn as nn
import torch.nn.functional as F

class Model(nn.Module):
    # 初始化神经网络
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 20, 5) 
        self.conv2 = nn.Conv2d(20, 20, 5)
	# 对输入数据进行前向传播
    def forward(self, x):
        x = F.relu(self.conv1(x))  
        return F.relu(self.conv2(x))  
    	#输入——卷积——非线性处理——卷积——非线性处理——输出

例:

import torch
from torch import nn

class MyModel(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self,input):
        output = input+1.0
        return output

model = MyModel()
x = torch.tensor(1.0)
output = model(x)  # 在forward函数中会自动调用 __call__
print(output)

torch.nn.Sequential,可以将各个层按照顺序搭建成一个神经网络model,更加简洁方便

model = nn.Sequential(
          nn.Conv2d(1,20,5),
          nn.ReLU(),
          nn.Conv2d(20,64,5),
          nn.ReLU()
        )

例:

import torch
from torch import nn
from torch.utils.tensorboard import SummaryWriter


class MyModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.model1 = nn.Sequential(
            # padding = (kernel_size-1)/2
            nn.Conv2d(in_channels=3, out_channels=32,
                      kernel_size=5, stride=1, padding=2),
             # stride=2
            nn.MaxPool2d(kernel_size=2),
            nn.Conv2d(in_channels=32, out_channels=32,
                                   kernel_size=5, stride=1, padding=2),
            nn.MaxPool2d(kernel_size=2),
            nn.Conv2d(in_channels=32, out_channels=64,
                                   kernel_size=5, stride=1, padding=2),
            nn.MaxPool2d(kernel_size=2),
            nn.Flatten(),
            nn.Linear(1024, 64),
            nn.Linear(64, 10)
        )

    def forward(self,x):
        x = self.model1(x)
        return x


model = MyModel()
print(model)
input = torch.ones((64, 3, 32, 32))
output = model(input)
print(output.shape)

writer = SummaryWriter("logs")
writer.add_graph(model, input)
writer.close()

2. Convolution Layers 卷积层

主要学习torch.nn.Conv1/2/3d

卷积的作用是提取特征

卷积计算的原理
import torch
import torch.nn.functional as F

input = torch.tensor([[1, 2, 0, 3, 1],
                      [0, 1, 2, 3, 1],
                      [1, 2, 1, 0, 0],
                      [5, 2, 3, 1, 1],
                      [2, 1, 0, 1, 1]])
# 卷积核
kernel = torch.tensor([[1, 2, 1],
                       [0, 1, 0],
                       [2, 1, 0]])

input = torch.reshape(input, (1, 1, 5, 5))
kernel = torch.reshape(kernel, (1, 1, 3, 3))
'''
torch.nn.functional.conv2d(input, weight, bias=None, stride=1, 
                            padding=0, dilation=1, groups=1) → Tensor
    input:输入矩阵,需要reshape(minibatch,输入通道数,长,宽)
    weight:卷积核,需要reshape(输出通道数,输入通道数,长,宽
    stride:步长,一维数字或二维元组
    padding:输入矩阵上下左右扩充并补零,一维数字或二维元组
'''
print(input.shape)
print(kernel.shape)

output = F.conv2d(input, kernel, stride=1)
print(output)

output2 = F.conv2d(input, kernel, stride=2)
print(output2)

output3 = F.conv2d(input, kernel, stride=1, padding=1)
print(output3)
神经网络中添加卷积运算
import torch
import torchvision
from torch import nn
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

dataset = torchvision.datasets.CIFAR10("./dataset", train=False, transform=torchvision.transforms.ToTensor(),
                                       download=True)
dataloader = DataLoader(dataset, batch_size=64)

'''
torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros',  device=None, dtype=None)
Args:
    in_channels(int):输入图像的通道数,一般为3
    out_channels(int):输出图像的通道数,一般决定了卷积核的数量,用于提取不同的特征指标。
    kernel_size(int/tuple):卷积核大小。会在神经网络的训练过程中进行不断的调整。
'''
class MyModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=6, kernel_size=3, stride=1, padding=0)

    def forward(self, x):
        x = self.conv1(x)
        return x

model = MyModel()

writer = SummaryWriter("logs")
step = 0
for data in dataloader:
    imgs, targets = data
    # torch.Size([64, 3, 32, 32]) -> torch.Size([64, 6, 30, 30])
    output = model(imgs)
    writer.add_images("input", imgs, step)

    # torch.Size([64, 6, 30, 30])  -> torch.Size([xxx, 3, 30, 30]) 三个通道才能显示。不知道batchsize可以设置为-1。
    output = torch.reshape(output, (-1, 3, 30, 30))
    writer.add_images("output", output, step)

    step = step + 1

writer.close()

3. Pooling Layers 池化层

主要学习torch.nn.MaxPool1/2/3d最大池化。

池化的作用是降低特征的数据量

池化计算的原理

在输入图像中,每一个池化核大小的窗口中,取最大特征值。

'''
torch.nn.MaxPool2d(kernel_size, stride=None, padding=0, 
                    dilation=1, return_indices=False, ceil_mode=False)
Args:
    kernel_size:池化核大小
    stride:默认为kernel_size
    dilation:空洞池化核大小
    ceil_mode:True->池化窗口如果部分越界,则保留, False ->池化窗口如果部分越界,则不保留

Input/Output:(N,C,H,W)
'''
class MyModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.maxpool = nn.MaxPool2d(kernel_size=3, ceil_mode=True)

    def forward(self, input):
        output = self.maxpool(input)
        return output

model = MyModel()
input = torch.tensor([[1,2,0,3,1],
                     [0,1,2,3,1],
                     [1,2,1,0,0],
                     [5,2,3,1,1],
                     [2,1,0,1,1]])
input = torch.reshape(input,(-1,1,5,5))
output =model(input)
print(output)
神经网络中添加池化运算
dataset = torchvision.datasets.CIFAR10("dataset", train=False, download=True,transform=torchvision.transforms.ToTensor())

dataloader = DataLoader(dataset, batch_size=64)

class MyModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.maxpool = nn.MaxPool2d(kernel_size=3, ceil_mode=True)

    def forward(self, input):
        output = self.maxpool(input)
        return output

model = MyModel()

writer = SummaryWriter("logs")
step = 0
for data in dataloader:
    imgs, targets = data
    writer.add_images("input", imgs, step)
    output = model(imgs)
    writer.add_images("output", output, step)
    step = step + 1

writer.close()

4. Non-linear Activations 非线性激活

主要学习nn.ReLUnn.Sigmoid

非线性激活函数的作用是提高拟合能力

非线性激活计算

'''
torch.nn.ReLU(inplace=False/True)
torch.nn.Sigmoid()
Args:
    inplace:True->input被改变,没有返回值 False->input不变,返回值作为Output
Input: (∗), where means any number of dimensions.
Output: (∗), same shape as the input.
'''
class MyModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.relu1 = ReLU()
        self.sigmoid1 = Sigmoid()

    def forward(self, input):
        output1 = self.relu1(input)
        output2 = self.sigmoid1(input)
        return output1,output2


model = MyModel()
input = torch.tensor([[1,-0.5],
                      [-1,3]])
output1,output2 = model(input)
print(output1)
print(output2)

'''
tensor([[1., 0.],
        [0., 3.]])
tensor([[0.7311, 0.3775],
        [0.2689, 0.9526]])
'''
神经网络中添加非线性激活
dataset = torchvision.datasets.CIFAR10("dataset", train=False, download=True,transform=torchvision.transforms.ToTensor())

dataloader = DataLoader(dataset, batch_size=64)

class MyModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.relu1 = ReLU()
        self.sigmoid1 = Sigmoid()

    def forward(self, input):
        output1 = self.relu1(input)
        output2 = self.sigmoid1(input)
        return output1,output2


model = MyModel()

writer = SummaryWriter("logs")
step = 0
for data in dataloader:
    imgs, targets = data
    writer.add_images("input", imgs, step)
    output1, output2 = model(imgs)
    writer.add_images("ReLU", output1, step)
    writer.add_images("Sigmoid", output2, step)
    step += 1

writer.close()

5. 全连接层/线性层 Linear Layers

​ 线性层主要是对数据进行线性变化,从而改变数据的维度。例如,可以通过线性层将高维数据降维,以减少计算复杂度;或者通过线性层将低维数据升维,以适应模型的需要。
在这里插入图片描述

import torch
import torchvision
from torch import nn
from torch.utils.data import DataLoader

dataset = torchvision.datasets.CIFAR10("dataset", train=False, transform=torchvision.transforms.ToTensor(),
                                       download=True)

dataloader = DataLoader(dataset, batch_size=64, drop_last=True)

'''
torch.nn.Linear(in_features, out_features, bias=True, device=None, dtype=None)
Args:
    in_features (int) – size of each input sample
    out_features (int) – size of each output sample
'''
class MyModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear1 = None

    def forward(self, input):
        if self.linear1 is None:
            self.linear1 = nn.Linear(input.size(0), 10)
        output = self.linear1(input)
        return output

model = MyModel()

for data in dataloader:
    imgs, targets = data
    print(imgs.shape)  # Should print [64, 3, 32, 32]

    input = torch.flatten(imgs)  # 整个张量会被展平成一维向量
    print(input.shape)  # Should print [64, 3072]

    output = model(input)
    print(output.shape)  # Should print [64, 10]

'''
Output:
torch.Size([64, 3, 32, 32])
torch.Size([196608])
torch.Size([10])
'''
    

6. 正则层 Normalizaton Layers

正则化防止过拟合,作用于cost函数

归一化加快训练收敛速度,作用于激活函数输入

使用待更!

7.损失函数 Loss Function

  • 计算实际输出和目标之间的差距
  • 为更新输出,或者说是神经网络的参数提供依据**(反向传播)**
损失函数计算

torch.nn.L1loss计算均差

torch.nn.MSEloss计算均方差,常用于回归问题

torch.nn.CrossEntropyLoss 计算交叉熵,常用于分类问题
loss ( x , class ) = − log ⁡ ( exp ⁡ ( x [ class ] ) ∑ j exp ⁡ ( x [ j ] ) ) = − x [ class ] + log ⁡ ( ∑ j exp ⁡ ( x [ j ] ) ) \begin{equation} \text{loss}(x, \text{class}) = -\log \left( \frac{\exp(x[\text{class}])}{\sum_{j} \exp(x[j])} \right) = -x[\text{class}] + \log \left( \sum_{j} \exp(x[j]) \right) \end{equation} loss(x,class)=log(jexp(x[j])exp(x[class]))=x[class]+log(jexp(x[j]))
例子:

import torch
from torch.nn import L1Loss
from torch import nn

inputs = torch.tensor([1, 2, 3], dtype=torch.float32)
targets = torch.tensor([1, 2, 5], dtype=torch.float32)

inputs = torch.reshape(inputs, (1, 1, 1, 3))
targets = torch.reshape(targets, (1, 1, 1, 3))
'''
torch.nn.L1Loss(size_average=None, reduce=None, reduction='mean')

Args:
    reduction:Specifies the reduction to apply to the output: 'none' | 'mean' | 'sum'. 'none'
              default = 'mean'
'''
loss = L1Loss()
result = loss(inputs, targets)

loss_mse = nn.MSELoss()
result_mse = loss_mse(inputs, targets)

print(result)
print(result_mse)


x = torch.tensor([0.1, 0.2, 0.3])
y = torch.tensor([1])
x = torch.reshape(x, (1, 3))
'''
torch.nn.CrossEntropyLoss(weight=None, size_average=None, ignore_index=-100, reduce=None, reduction='mean', label_smoothing=0.0)

Input:(C),(N,C)
Output:(),(N)

'''
loss_cross = nn.CrossEntropyLoss()
result_cross = loss_cross(x, y)
print(result_cross)


'''
tensor(0.6667)
tensor(1.3333)
tensor(1.1019)
'''
在神经网络中计算损失函数
import torchvision
from torch import nn
from torch.nn import Sequential, Conv2d, MaxPool2d, Flatten, Linear
from torch.utils.data import DataLoader

dataset = torchvision.datasets.CIFAR10("dataset", train=False, transform=torchvision.transforms.ToTensor(),
                                       download=True)

dataloader = DataLoader(dataset, batch_size=1)

class MyModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.model1 = Sequential(
            Conv2d(3, 32, 5, padding=2),
            MaxPool2d(2),
            Conv2d(32, 32, 5, padding=2),
            MaxPool2d(2),
            Conv2d(32, 64, 5, padding=2),
            MaxPool2d(2),
            Flatten(),
            Linear(1024, 64),
            Linear(64, 10)
        )

    def forward(self, x):
        x = self.model1(x)
        return x


loss = nn.CrossEntropyLoss()
model = MyModel()
for data in dataloader:
    imgs, targets = data
    outputs = model(imgs)
    print(imgs) # BS*3*32*32
    print(targets) # BS*1*1*1,是img对应的类别
    print(outputs) # BS*1*1*10,是img每个类别的得分
    result_loss = loss(outputs, targets)
    result_loss.backward() # 启动反向传播过程,计算梯度,用于后续的参数更新。
    print(result_loss) # tensor(loss值, grad_fn=...)

优化器 torch.optim

基本使用框架

  • 构造优化器:选择优化器算法
  • 每个batch_size有:
    • 梯度清零
    • 计算loss
    • 计算loss梯度
    • 优化器的step方法根据loss梯度更新模型参数
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
optimizer = optim.Adam([var1, var2], lr=0.0001)
'''
Args:
	模型参数params
	学习速率lr:太大不收敛,太小速度慢。
	优化器其他参数....
'''

例子:

dataset = torchvision.datasets.CIFAR10("dataset", train=False, transform=torchvision.transforms.ToTensor(),
                                       download=True)

dataloader = DataLoader(dataset, batch_size=64)

class MyModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.model1 = Sequential(
            Conv2d(3, 32, 5, padding=2),
            MaxPool2d(2),
            Conv2d(32, 32, 5, padding=2),
            MaxPool2d(2),
            Conv2d(32, 64, 5, padding=2),
            MaxPool2d(2),
            Flatten(),
            Linear(1024, 64),
            Linear(64, 10)
        )

    def forward(self, x):
        x = self.model1(x)
        return x


loss = nn.CrossEntropyLoss()
model = MyModel()
optim = torch.optim.SGD(model.parameters(), lr=0.01)


for epoch in range(20):
    running_loss = 0.0

    for data in dataloader:
        imgs, targets = data
        outputs = model(imgs)
        optim.zero_grad()  # 梯度清零
        result_loss = loss(outputs, targets) # 计算loss
        result_loss.backward() # 计算loss梯度
        optim.step() # 更新模型参数
        running_loss += result_loss

    print(running_loss)

神经网络模型应用

现有模型使用与微调

import torchvision
from torch import nn

# 在ImageNet数据集上预训练的model
vgg16_not_pretrained = torchvision.models.vgg16(weights = None)

# 权重未经训练的model
vgg16_pretrained = torchvision.models.vgg16(weights = "DEFAULT")

# 在VGG16中加入线性层进行微调
vgg16_pretrained.classifier.add_module('add_linear', nn.Linear(1000, 10))

# 在VGG16中对线性层直接修改进行微调
vgg16_not_pretrained.classifier[6] = nn.Linear(4096, 10)

train_data = torchvision.datasets.CIFAR10('../data', train=True, transform=torchvision.transforms.ToTensor(),download=True)

模型保存和读取

import torch
import torchvision

vgg16 = torchvision.models.vgg16(weights = None)

# 保存方式 1
torch.save(vgg16,"vgg16_method1.pth") # 保存了模型结构+模型参数
# 保存方式 2
torch.save(vgg16.state_dict(),"vgg16_method2.pth") # 以字典方式保存了模型参数

# 加载方式 1
model = torch.load("vgg16_method1.pth") # 打印模型结构
# 加载方式 2
model = torchvision.models.vgg16(weights = None)
model.load_state_dict(torch.load("vgg16_method2.pth"))

模型训练

基本流程
  1. 准备数据集
  2. DataLoader加载数据集
  3. 搭建神经网络
  4. 创建神经网络实例模型
  5. 设置损失函数
  6. 设置优化器
  7. 设置训练网络的参数
  8. epoch大循环
  • train_dataloader小循环

    • input - > output
    • loss计算
    • 优化器梯度清零
    • loss梯度计算
    • 优化器更新参数
  • 在测试数据集上检测本轮模型

    • test_dataloader小循环
  • 打印本轮模型测试数据集上的lossaccurary

  • 保存本轮模型

import torchvision
from torch.utils.tensorboard import SummaryWriter
from torch import nn
from torch.utils.data import DataLoader
from model import *

# 准备数据集
train_data = torchvision.datasets.CIFAR10(root="dataset", train=True, transform=torchvision.transforms.ToTensor(),
                                          download=True)
test_data = torchvision.datasets.CIFAR10(root="dataset", train=False, transform=torchvision.transforms.ToTensor(),
                                         download=True)

train_data_size = len(train_data) # 数据集长度
test_data_size = len(test_data)
print(f"训练数据集的长度为:{train_data_size}") # 字符串格式化
print(f"测试数据集的长度为:{test_data_size}")

# 加载数据集
train_dataloader = DataLoader(train_data, batch_size=64)
test_dataloader = DataLoader(test_data, batch_size=64)

# 创建网络模型
model = MyModel()

# 损失函数
loss_fn = nn.CrossEntropyLoss()

# 优化器
# learning_rate = 0.01
# 1e-2=1 x (10)^(-2) = 1 /100 = 0.01
learning_rate = 1e-2
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

# 设置训练网络的参数
total_train_step = 0  # 记录训练的次数
total_test_step = 0   # 记录测试的次数
epoch = 10   # 训练的轮数


# 添加tensorboard
writer = SummaryWriter("logs")

for i in range(epoch):
    print(f"-------第 {i+1} 轮训练开始-------")

    model.train()  # 模型进入训练状态

    for data in train_dataloader:
        imgs, targets = data
        outputs = model(imgs)
        loss = loss_fn(outputs, targets)

        # 优化器优化模型
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_train_step = total_train_step + 1
        if total_train_step % 100 == 0:    # 逢100打印一次
            print(f"训练次数:{total_train_step}, Loss: {loss.item()}")
            writer.add_scalar("train_loss", loss.item(), total_train_step)

    # 测试步骤开始
    model.eval() # 模型进入测试状态
    total_test_loss = 0
    total_test_accuracy = 0
    with torch.no_grad(): # 避免改变对模型参数
        for data in test_dataloader:
            imgs, targets = data
            outputs = model(imgs)
            loss = loss_fn(outputs, targets)
            total_test_loss += loss.item()
            '''
                argmax可以输出outputs得分最高的类别位置
                argmax(0)是横向看
                argmax(1)是竖向看
            '''
            accuracy = (outputs.argmax(1) == targets).sum()
            total_test_accuracy += accuracy

    print("整体测试集上的Loss: {}".format(total_test_loss))
    print("整体测试集上的Accurary: {}".format(total_test_accuracy/test_data_size))
    total_test_step = total_test_step + 1
    writer.add_scalar("total_test_loss", total_test_loss, total_test_step)
    writer.add_scalar("test_accuracy", total_test_accuracy / test_data_size, total_test_step)

    torch.save(model, f"./models/model_{i+1}.pth")
    # torch.save(model,f"./models/model_{i+1}.pth"))
    print("模型已保存")

writer.close()


使用GPU训练
第一种

判断是否可以用cuda加速 if torch.cuda.is_available():

如何可以,则:

网络模型 model = model.cuda()

损失函数 loss_fn = loss_fn.cuda()

数据 imgs = imgs.cuda()
targets = targets.cuda()

第二种

定义训练的设备 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

把网络模型、损失函数和数据转移到设备上 model.to(device)loss_fn.to(device)imgs.to(device)targets.to(device)

模型验证

利用我们训练好的一个模型进行测试:输入数据查看输出是否符合要求或满足一定的正确率

import torch
import torchvision
from PIL import Image
from model import *
from torch import nn

image_path = "test_imgs/dog.png"
image = Image.open(image_path)
image = image.convert('RGB') # 将png图片由RGBA四通道转换为RGB三通道
transform = torchvision.transforms.Compose([torchvision.transforms.Resize((32, 32)),
                                            torchvision.transforms.ToTensor()])

image = transform(image)  # torch.Size([3, 32, 32])

model = torch.load("models/model_best.pth", map_location=torch.device('cpu')) # 指定模型运行的环境
image = torch.reshape(image, (1, 3, 32, 32))   # torch.Size([1, 3, 32, 32])

# 进行测试
model.eval()
with torch.no_grad():
    output = model(image)

print(output) # 每个类别的得分
print(output.argmax(1)) # 最大得分类别

完结撒花~

  • 22
    点赞
  • 10
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值