pytorch学习笔记3 - 神经网络基础

nn.module

参考文档https://pytorch.org/docs/stable/generate/torch.nn.Module.html?highlight=nn%20module#torch.nn.Module

module是所有神经网络最基本的类

import torch.nn as nn
import torch.nn.functional as F

class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.conv1 = nn.Conv2d(1, 20, 5)
        self.conv2 = nn.Conv2d(20, 20, 5)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        return F.relu(self.conv2(x))

F.relu常用的激活函数
nn.conv2d卷积函数

写个简易的神经网络

import torch
from torch import nn
import torch.nn.functional as F


class simple_nn(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, input):
        output = F.relu(input)
        return output


example_nn = simple_nn()
x = torch.tensor(0.1)
print(example_nn(x))
x = torch.tensor(-0.1)
print(example_nn(x))

'''
tensor(0.1000)
tensor(0.)
'''

卷积

文档https://pytorch.org/docs/stable/nn.html

看看conv2d的定义

In the simplest case, the output value of the layer with input size ( N , C in , H , W ) (N, C_{\text{in}}, H, W) (N,Cin,H,W) and output ( N , C out , H out , W out ) (N, C_{\text{out}}, H_{\text{out}}, W_{\text{out}}) (N,Cout,Hout,Wout) can be precisely described as:
out ⁡ ( N i , C out  j ) = bias ⁡ ( C out  j ) + ∑ k = 0 C in  − 1  weight  ( C out  j , k ) ⋆ input ⁡ ( N i , k ) \operatorname{out}\left(N_{i}, C_{\text {out }_{j}}\right)=\operatorname{bias}\left(C_{\text {out }_{j}}\right)+\sum_{k=0}^{C_{\text {in }}-1} \text { weight }\left(C_{\text {out }_{j}}, k\right) \star \operatorname{input}\left(N_{i}, k\right) out(Ni,Cout j)=bias(Cout j)+k=0Cin 1 weight (Cout j,k)input(Ni,k)
也就是对于每个通道 C o u t j C_{out_j} Coutj, 权重与输入的互相关的累加(不知道描述准不准确, 没学过cross-correlation…)再加上通道的偏差bias

还是比较复杂, 尝试说的简单一点, 卷积在神经网络中的处理方式, 简单理解就是矩阵的对应位置乘法和乘积的加法

在这里插入图片描述

在这里插入图片描述

输入是的5x5矩阵, 3x3矩阵是卷积核, 卷积操作就是将卷积核盖到输入矩阵上按位置乘, 再累加得到的数值作为输出矩阵的相应位置的结果, 卷积核向右向下移动依次执行就能得到一个新的3x3的矩阵, 然后这个矩阵就是卷积操作的结果
另外stride参数表示卷积核每一步移动的单位数, padding参数表示给输入矩阵边界添加一圈0得到的扩展矩阵(主要是用来让卷积核可以覆盖的范围增加)

比如padding=2时, 输入矩阵变为

在这里插入图片描述
图片比较粗糙, 直接看官方文档的动图就秒懂了

import torch
import torch.nn.functional as F


input = torch.tensor([[ 1,  2,  3,  4,  5],
                      [10,  9,  8,  7,  6],
                      [11, 12, 13, 14, 15],
                      [20, 19, 18, 17, 16],
                      [21, 22, 23, 24, 25]])

kernel = torch.tensor([[1, 0, 1],
                       [0, 1, 0],
                       [1, 0, 1]])

input = torch.reshape(input, (1, 1, 5, 5))
kernel = torch.reshape(kernel, (1, 1, 3, 3))

print(input.shape)
print(kernel.shape)

output = F.conv2d(input, kernel, stride=1)
print(output)

output2 = F.conv2d(input, kernel, stride=2)
print(output2)

output3 = F.conv2d(input, kernel, stride=1, padding=1)
print(output3)

'''
torch.Size([1, 1, 5, 5])
torch.Size([1, 1, 3, 3])
tensor([[[[37, 40, 43],
          [68, 65, 62],
          [87, 90, 93]]]])
tensor([[[[37, 43],
          [87, 93]]]])
tensor([[[[10, 20, 19, 18, 12],
          [24, 37, 40, 43, 24],
          [39, 68, 65, 62, 39],
          [54, 87, 90, 93, 54],
          [40, 60, 59, 58, 42]]]])
'''

输入矩阵和卷积核的tensor维度问题, 输入的tensor需要有batch_size, channel, hight, width四个维度
所以需要reshape, 第一个是batch大小, 第二个是通道数, 后两个是矩阵的高和宽(行和列)

input = torch.reshape(input, (1, 1, 5, 5))
kernel = torch.reshape(kernel, (1, 1, 3, 3))

卷积层

用一个dataset来进行演示, 给神经网络定义卷积层conv

import torch
import torchvision
from torch.nn import Conv2d
from torch.utils.data import DataLoader
from torch import nn
from torch.utils.tensorboard import SummaryWriter

dataset = torchvision.datasets.CIFAR10(root="./dataset", train=False, transform=torchvision.transforms.ToTensor(), download=True)
dataloader = DataLoader(dataset, batch_size=64)


class simple_convnn(nn.Module):
    def __init__(self):
        super(simple_convnn, self).__init__()
        self.conv = Conv2d(in_channels=3, out_channels=6, kernel_size=3, stride=1, padding=0)

    def forward(self, x):
        x = self.conv(x)
        return x


example_convnn = simple_convnn()
writer = SummaryWriter("logs")

step = 0
for data in dataloader:
    imgs, targets = data
    output = example_convnn(imgs)
    shape = output.shape
    # print(shape)
    output_reshape = torch.reshape(output, (-1, 3, 30, 30))
    writer.add_images("inputs pictures", imgs, step)
    writer.add_images("outputs pictures", output_reshape, step)
    step += 1

writer.close()

效果
在这里插入图片描述

在这里插入图片描述

池化

池化的作用主要是给数据降维, 用池化核处理输入的tensor, 保留特征降低运算量
最大池化MaxPool2d的文档https://pytorch.org/docs/stable/generated/torch.nn.MaxPool2d.html#torch.nn.MaxPool2d

class torch.nn.MaxPool2d(kernel_size, stride=None, padding=0, 
dilation=1, return_indices=False, ceil_mode=False)
import torch
import torchvision
from torch.nn import MaxPool2d
from torch.utils.data import DataLoader
from torch import nn
from torch.utils.tensorboard import SummaryWriter

dataset = torchvision.datasets.CIFAR10(root="./dataset", train=False, transform=torchvision.transforms.ToTensor(), download=True)
dataloader = DataLoader(dataset, batch_size=64)


class pooling_nn(nn.Module):
    def __init__(self):
        super(pooling_nn, self).__init__()
        self.maxpool = MaxPool2d(kernel_size=3, ceil_mode=False)

    def forward(self, input):
        output = self.maxpool(input)
        return output


example_pooling = pooling_nn()
writer = SummaryWriter("logs")

step = 0
for data in dataloader:
    imgs, targets = data
    output = example_pooling(imgs)
    writer.add_images("input", imgs, step)
    writer.add_images("output after pooling", output, step)
    step += 1

writer.close()

效果, 有点像打马赛克的感觉, 保留主要特征, 省略不重要的数据
在这里插入图片描述

sequential

类似transforms的compose, sequential是用来拼接神经网络层的类
文档

import torch
from torch import nn


class simple_sequential(nn.Module):
    def __init__(self):
        super(simple_sequential, self).__init__()
        self.model = nn.Sequential(
            nn.Conv2d(3, 32, 5, 1, 2),
            nn.MaxPool2d(2),
            nn.Conv2d(32, 32, 5, 1, 2),
            nn.MaxPool2d(2),
            nn.Conv2d(32, 64, 5, 1, 2),
            nn.MaxPool2d(2),
            nn.Flatten(),
            nn.Linear(64*4*4, 64),
            nn.Linear(64, 10)
        )

    def forward(self, x):
        x = self.model(x)
        return x


if __name__ == '__main__':
    example_sequential = simple_sequential()
    input = torch.ones((64, 3, 32, 32))
    output = example_sequential(input)
    print(output.shape)

'''
torch.Size([64, 10])
'''

神经网络结构:
卷积 → 池化 → 卷积 → 池化 → 卷积 → 池化 → 平展 → 全连接层 → 全连接层

损失函数

ML里的重点, 一切都要归结到损失函数上…

常用的损失函数

  • L1loss 平均绝对误差
  • MSEloss 均方误差
  • CrossEntropyLoss 交叉熵loss

误差loss

inputs = torch.tensor([1, 2, 3], dtype=torch.float32)
targets = torch.tensor([1, 2, 5], dtype=torch.float32)

inputs = torch.reshape(inputs, (1, 1, 1, 3))
targets = torch.reshape(targets, (1, 1, 1, 3))

loss = L1Loss(reduction='sum')
result = loss(inputs, targets)

loss_mse = nn.MSELoss()
result_mse = loss_mse(inputs, targets)

print(result)
print(result_mse)


x = torch.tensor([0.1, 0.2, 0.3])
y = torch.tensor([1])
x = torch.reshape(x, (1, 3))
loss_cross = nn.CrossEntropyLoss()
result_cross = loss_cross(x, y)
print(result_cross)


'''
tensor(2.)
tensor(1.3333)
tensor(1.1019)
'''

交叉熵loss

import torchvision
from torch import nn
from torch.nn import Sequential, Conv2d, MaxPool2d, Linear, Flatten
from torch.utils.data import DataLoader

dataset = torchvision.datasets.CIFAR10("./dataset", train=False,
                                       transform=torchvision.transforms.ToTensor(), download=True)
dataloader = DataLoader(dataset, batch_size=1)


class simple_nn(nn.Module):
    def __init__(self):
        super(simple_nn, self).__init__()
        self.model1 = Sequential(
            Conv2d(3, 32, 5, padding=2),
            MaxPool2d(2),
            Conv2d(32, 32, 5, padding=2),
            MaxPool2d(2),
            Conv2d(32, 64, 5, padding=2),
            MaxPool2d(2),
            Flatten(),
            Linear(1024, 64),
            Linear(64, 10)
        )

    def forward(self, x):
        x = self.model1(x)
        return x


loss = nn.CrossEntropyLoss()
example_nn = simple_nn()
for data in dataloader:
    imgs, targets = data
    outputs = example_nn(imgs)
    result_loss = loss(outputs, targets)
    print(result_loss)

优化器

优化器通过反向传播给神经网络参数进行调优
优化器有很多, 可以看看文档

这里演示用优化器进行10轮训练, loss总量的变化

import torch
import torchvision
from torch import nn
from torch.nn import Conv2d, Flatten, Linear, MaxPool2d, Sequential
from torch.utils.data import DataLoader

dataset = torchvision.datasets.CIFAR10("dataset", train=False,
                                       transform=torchvision.transforms.ToTensor(), download=True)
dataloader = DataLoader(dataset, batch_size=1)

class nn_optim(nn.Module):
    def __init__(self):
        super(nn_optim, self).__init__()
        self.model1 = Sequential(
            Conv2d(3, 32, 5, padding=2),
            MaxPool2d(2),
            Conv2d(32, 32, 5, padding=2),
            MaxPool2d(2),
            Conv2d(32, 64, 5, padding=2),
            MaxPool2d(2),
            Flatten(),
            Linear(1024, 64),
            Linear(64, 10)
        )

    def forward(self, x):
        x = self.model1(x)
        return x


loss = nn.CrossEntropyLoss()
example_optim = nn_optim()
optim = torch.optim.SGD(example_optim.parameters(), lr=0.01)
for epoch in range(10):
    running_loss = 0.0
    for data in dataloader:
        imgs, targets = data
        outputs = example_optim(imgs)
        result_loss = loss(outputs, targets)
        optim.zero_grad()
        result_loss.backward()
        optim.step()
        running_loss = running_loss + result_loss
    print(running_loss)

'''
tensor(18722.8223, grad_fn=<AddBackward0>)
tensor(16202.5547, grad_fn=<AddBackward0>)
tensor(15438.5029, grad_fn=<AddBackward0>)
tensor(16092.9414, grad_fn=<AddBackward0>)
tensor(17612.1680, grad_fn=<AddBackward0>)
tensor(20097.0996, grad_fn=<AddBackward0>)
tensor(22193.0488, grad_fn=<AddBackward0>)
tensor(23876.7910, grad_fn=<AddBackward0>)
tensor(24815.5781, grad_fn=<AddBackward0>)
tensor(25785.2207, grad_fn=<AddBackward0>)
'''

(可以看到第3轮训练时loss最小, 后边就是反向训练了

网络模型修改

类似迁移学习的技术, 直接应用开源的网络模型, 做一定的修改, 然后适配特定问题, 得到一个新的模型
比如应用vgg16去做CIFAR10数据集的训练, vgg16输出是1000 features, 而CIFAR10是10分类数据集, 所以需要对已有模型进行修改

import torch
import torchvision
from torch import nn
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

vgg16_false = torchvision.models.vgg16(pretrained=False)
vgg16_true = torchvision.models.vgg16(pretrained=True)
dataset = torchvision.datasets.CIFAR10('dataset', train=True,
                                          transform=torchvision.transforms.ToTensor(), download=True)
dataloader = DataLoader(dataset, batch_size=64)

print(vgg16_true)
vgg16_true.classifier.add_module('add_linear', nn.Linear(1000, 30))
print(vgg16_true)

print(vgg16_false)
vgg16_false.classifier[6] = nn.Linear(4096, 10)
print(vgg16_false)


writer = SummaryWriter("logs")
step = 0
for data in dataloader:
    imgs, targets = data
    output = vgg16_true(imgs)
    output = torch.reshape(output, (-1,3,10,8))
    writer.add_images("inputs pictures", imgs, step)
    writer.add_images("outputs pictures", output, step)
    step += 1

writer.close()

添加网络层

print(vgg16_true)
vgg16_true.classifier.add_module('add_linear', nn.Linear(1000, 30))
print(vgg16_true)

修改网络层

print(vgg16_false)
vgg16_false.classifier[6] = nn.Linear(4096, 10)
print(vgg16_false)

模型保存和读取

保存

vgg16 = torchvision.models.vgg16(pretrained=False)
# 保存方式1,模型结构 + 模型参数
torch.save(vgg16, "vgg16_method1.pth")

# 保存方式2,模型参数(官方推荐)
torch.save(vgg16.state_dict(), "vgg16_method2.pth")

读取

# 方式1, 直接加载模型
model = torch.load("vgg16_method1.pth")

# 方式2,先加载无预训练模型, 然后加载模型参数
vgg16 = torchvision.models.vgg16(pretrained=False)
vgg16.load_state_dict(torch.load("vgg16_method2.pth"))

总结

简单过了一遍搭建神经网络的流程, 比较浅显, 后边还要多练习和多读代码提高ML开发的功力.
下篇blog总结一下模型训练相关的流程和技术.

我的blog只是学了什么记录什么, 当然不能跟专业的AI大佬相提并论, 而且主要也是给我自己看的, 如果碰巧能帮助到各位路过的大佬也是荣幸之至.
另外blog中有什么错误还请大佬们斧正斧正.

  • 1
    点赞
  • 13
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值