nn.module
参考文档https://pytorch.org/docs/stable/generate/torch.nn.Module.html?highlight=nn%20module#torch.nn.Module
module是所有神经网络最基本的类
import torch.nn as nn
import torch.nn.functional as F
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.conv1 = nn.Conv2d(1, 20, 5)
self.conv2 = nn.Conv2d(20, 20, 5)
def forward(self, x):
x = F.relu(self.conv1(x))
return F.relu(self.conv2(x))
F.relu
常用的激活函数
nn.conv2d
卷积函数
写个简易的神经网络
import torch
from torch import nn
import torch.nn.functional as F
class simple_nn(nn.Module):
def __init__(self):
super().__init__()
def forward(self, input):
output = F.relu(input)
return output
example_nn = simple_nn()
x = torch.tensor(0.1)
print(example_nn(x))
x = torch.tensor(-0.1)
print(example_nn(x))
'''
tensor(0.1000)
tensor(0.)
'''
卷积
文档https://pytorch.org/docs/stable/nn.html
看看conv2d的定义
In the simplest case, the output value of the layer with input size
(
N
,
C
in
,
H
,
W
)
(N, C_{\text{in}}, H, W)
(N,Cin,H,W) and output
(
N
,
C
out
,
H
out
,
W
out
)
(N, C_{\text{out}}, H_{\text{out}}, W_{\text{out}})
(N,Cout,Hout,Wout) can be precisely described as:
out
(
N
i
,
C
out
j
)
=
bias
(
C
out
j
)
+
∑
k
=
0
C
in
−
1
weight
(
C
out
j
,
k
)
⋆
input
(
N
i
,
k
)
\operatorname{out}\left(N_{i}, C_{\text {out }_{j}}\right)=\operatorname{bias}\left(C_{\text {out }_{j}}\right)+\sum_{k=0}^{C_{\text {in }}-1} \text { weight }\left(C_{\text {out }_{j}}, k\right) \star \operatorname{input}\left(N_{i}, k\right)
out(Ni,Cout j)=bias(Cout j)+k=0∑Cin −1 weight (Cout j,k)⋆input(Ni,k)
也就是对于每个通道
C
o
u
t
j
C_{out_j}
Coutj, 权重与输入的互相关的累加(不知道描述准不准确, 没学过cross-correlation…)再加上通道的偏差bias
还是比较复杂, 尝试说的简单一点, 卷积在神经网络中的处理方式, 简单理解就是矩阵的对应位置乘法和乘积的加法
输入是的5x5矩阵, 3x3矩阵是卷积核, 卷积操作就是将卷积核盖到输入矩阵上按位置乘, 再累加得到的数值作为输出矩阵的相应位置的结果, 卷积核向右向下移动依次执行就能得到一个新的3x3的矩阵, 然后这个矩阵就是卷积操作的结果
另外stride
参数表示卷积核每一步移动的单位数, padding
参数表示给输入矩阵边界添加一圈0得到的扩展矩阵(主要是用来让卷积核可以覆盖的范围增加)
比如padding=2
时, 输入矩阵变为
图片比较粗糙, 直接看官方文档的动图就秒懂了
import torch
import torch.nn.functional as F
input = torch.tensor([[ 1, 2, 3, 4, 5],
[10, 9, 8, 7, 6],
[11, 12, 13, 14, 15],
[20, 19, 18, 17, 16],
[21, 22, 23, 24, 25]])
kernel = torch.tensor([[1, 0, 1],
[0, 1, 0],
[1, 0, 1]])
input = torch.reshape(input, (1, 1, 5, 5))
kernel = torch.reshape(kernel, (1, 1, 3, 3))
print(input.shape)
print(kernel.shape)
output = F.conv2d(input, kernel, stride=1)
print(output)
output2 = F.conv2d(input, kernel, stride=2)
print(output2)
output3 = F.conv2d(input, kernel, stride=1, padding=1)
print(output3)
'''
torch.Size([1, 1, 5, 5])
torch.Size([1, 1, 3, 3])
tensor([[[[37, 40, 43],
[68, 65, 62],
[87, 90, 93]]]])
tensor([[[[37, 43],
[87, 93]]]])
tensor([[[[10, 20, 19, 18, 12],
[24, 37, 40, 43, 24],
[39, 68, 65, 62, 39],
[54, 87, 90, 93, 54],
[40, 60, 59, 58, 42]]]])
'''
输入矩阵和卷积核的tensor维度问题, 输入的tensor需要有batch_size, channel, hight, width
四个维度
所以需要reshape
, 第一个是batch大小, 第二个是通道数, 后两个是矩阵的高和宽(行和列)
input = torch.reshape(input, (1, 1, 5, 5))
kernel = torch.reshape(kernel, (1, 1, 3, 3))
卷积层
用一个dataset来进行演示, 给神经网络定义卷积层conv
import torch
import torchvision
from torch.nn import Conv2d
from torch.utils.data import DataLoader
from torch import nn
from torch.utils.tensorboard import SummaryWriter
dataset = torchvision.datasets.CIFAR10(root="./dataset", train=False, transform=torchvision.transforms.ToTensor(), download=True)
dataloader = DataLoader(dataset, batch_size=64)
class simple_convnn(nn.Module):
def __init__(self):
super(simple_convnn, self).__init__()
self.conv = Conv2d(in_channels=3, out_channels=6, kernel_size=3, stride=1, padding=0)
def forward(self, x):
x = self.conv(x)
return x
example_convnn = simple_convnn()
writer = SummaryWriter("logs")
step = 0
for data in dataloader:
imgs, targets = data
output = example_convnn(imgs)
shape = output.shape
# print(shape)
output_reshape = torch.reshape(output, (-1, 3, 30, 30))
writer.add_images("inputs pictures", imgs, step)
writer.add_images("outputs pictures", output_reshape, step)
step += 1
writer.close()
效果
池化
池化的作用主要是给数据降维, 用池化核处理输入的tensor, 保留特征降低运算量
最大池化MaxPool2d
的文档https://pytorch.org/docs/stable/generated/torch.nn.MaxPool2d.html#torch.nn.MaxPool2d
class torch.nn.MaxPool2d(kernel_size, stride=None, padding=0,
dilation=1, return_indices=False, ceil_mode=False)
import torch
import torchvision
from torch.nn import MaxPool2d
from torch.utils.data import DataLoader
from torch import nn
from torch.utils.tensorboard import SummaryWriter
dataset = torchvision.datasets.CIFAR10(root="./dataset", train=False, transform=torchvision.transforms.ToTensor(), download=True)
dataloader = DataLoader(dataset, batch_size=64)
class pooling_nn(nn.Module):
def __init__(self):
super(pooling_nn, self).__init__()
self.maxpool = MaxPool2d(kernel_size=3, ceil_mode=False)
def forward(self, input):
output = self.maxpool(input)
return output
example_pooling = pooling_nn()
writer = SummaryWriter("logs")
step = 0
for data in dataloader:
imgs, targets = data
output = example_pooling(imgs)
writer.add_images("input", imgs, step)
writer.add_images("output after pooling", output, step)
step += 1
writer.close()
效果, 有点像打马赛克的感觉, 保留主要特征, 省略不重要的数据
sequential
类似transforms的compose, sequential是用来拼接神经网络层的类
文档
import torch
from torch import nn
class simple_sequential(nn.Module):
def __init__(self):
super(simple_sequential, self).__init__()
self.model = nn.Sequential(
nn.Conv2d(3, 32, 5, 1, 2),
nn.MaxPool2d(2),
nn.Conv2d(32, 32, 5, 1, 2),
nn.MaxPool2d(2),
nn.Conv2d(32, 64, 5, 1, 2),
nn.MaxPool2d(2),
nn.Flatten(),
nn.Linear(64*4*4, 64),
nn.Linear(64, 10)
)
def forward(self, x):
x = self.model(x)
return x
if __name__ == '__main__':
example_sequential = simple_sequential()
input = torch.ones((64, 3, 32, 32))
output = example_sequential(input)
print(output.shape)
'''
torch.Size([64, 10])
'''
神经网络结构:
卷积 → 池化 → 卷积 → 池化 → 卷积 → 池化 → 平展 → 全连接层 → 全连接层
损失函数
ML里的重点, 一切都要归结到损失函数上…
常用的损失函数
- L1loss 平均绝对误差
- MSEloss 均方误差
- CrossEntropyLoss 交叉熵loss
误差loss
inputs = torch.tensor([1, 2, 3], dtype=torch.float32)
targets = torch.tensor([1, 2, 5], dtype=torch.float32)
inputs = torch.reshape(inputs, (1, 1, 1, 3))
targets = torch.reshape(targets, (1, 1, 1, 3))
loss = L1Loss(reduction='sum')
result = loss(inputs, targets)
loss_mse = nn.MSELoss()
result_mse = loss_mse(inputs, targets)
print(result)
print(result_mse)
x = torch.tensor([0.1, 0.2, 0.3])
y = torch.tensor([1])
x = torch.reshape(x, (1, 3))
loss_cross = nn.CrossEntropyLoss()
result_cross = loss_cross(x, y)
print(result_cross)
'''
tensor(2.)
tensor(1.3333)
tensor(1.1019)
'''
交叉熵loss
import torchvision
from torch import nn
from torch.nn import Sequential, Conv2d, MaxPool2d, Linear, Flatten
from torch.utils.data import DataLoader
dataset = torchvision.datasets.CIFAR10("./dataset", train=False,
transform=torchvision.transforms.ToTensor(), download=True)
dataloader = DataLoader(dataset, batch_size=1)
class simple_nn(nn.Module):
def __init__(self):
super(simple_nn, self).__init__()
self.model1 = Sequential(
Conv2d(3, 32, 5, padding=2),
MaxPool2d(2),
Conv2d(32, 32, 5, padding=2),
MaxPool2d(2),
Conv2d(32, 64, 5, padding=2),
MaxPool2d(2),
Flatten(),
Linear(1024, 64),
Linear(64, 10)
)
def forward(self, x):
x = self.model1(x)
return x
loss = nn.CrossEntropyLoss()
example_nn = simple_nn()
for data in dataloader:
imgs, targets = data
outputs = example_nn(imgs)
result_loss = loss(outputs, targets)
print(result_loss)
优化器
优化器通过反向传播给神经网络参数进行调优
优化器有很多, 可以看看文档
这里演示用优化器进行10轮训练, loss总量的变化
import torch
import torchvision
from torch import nn
from torch.nn import Conv2d, Flatten, Linear, MaxPool2d, Sequential
from torch.utils.data import DataLoader
dataset = torchvision.datasets.CIFAR10("dataset", train=False,
transform=torchvision.transforms.ToTensor(), download=True)
dataloader = DataLoader(dataset, batch_size=1)
class nn_optim(nn.Module):
def __init__(self):
super(nn_optim, self).__init__()
self.model1 = Sequential(
Conv2d(3, 32, 5, padding=2),
MaxPool2d(2),
Conv2d(32, 32, 5, padding=2),
MaxPool2d(2),
Conv2d(32, 64, 5, padding=2),
MaxPool2d(2),
Flatten(),
Linear(1024, 64),
Linear(64, 10)
)
def forward(self, x):
x = self.model1(x)
return x
loss = nn.CrossEntropyLoss()
example_optim = nn_optim()
optim = torch.optim.SGD(example_optim.parameters(), lr=0.01)
for epoch in range(10):
running_loss = 0.0
for data in dataloader:
imgs, targets = data
outputs = example_optim(imgs)
result_loss = loss(outputs, targets)
optim.zero_grad()
result_loss.backward()
optim.step()
running_loss = running_loss + result_loss
print(running_loss)
'''
tensor(18722.8223, grad_fn=<AddBackward0>)
tensor(16202.5547, grad_fn=<AddBackward0>)
tensor(15438.5029, grad_fn=<AddBackward0>)
tensor(16092.9414, grad_fn=<AddBackward0>)
tensor(17612.1680, grad_fn=<AddBackward0>)
tensor(20097.0996, grad_fn=<AddBackward0>)
tensor(22193.0488, grad_fn=<AddBackward0>)
tensor(23876.7910, grad_fn=<AddBackward0>)
tensor(24815.5781, grad_fn=<AddBackward0>)
tensor(25785.2207, grad_fn=<AddBackward0>)
'''
(可以看到第3轮训练时loss最小, 后边就是反向训练了
网络模型修改
类似迁移学习的技术, 直接应用开源的网络模型, 做一定的修改, 然后适配特定问题, 得到一个新的模型
比如应用vgg16去做CIFAR10数据集的训练, vgg16输出是1000 features, 而CIFAR10是10分类数据集, 所以需要对已有模型进行修改
import torch
import torchvision
from torch import nn
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
vgg16_false = torchvision.models.vgg16(pretrained=False)
vgg16_true = torchvision.models.vgg16(pretrained=True)
dataset = torchvision.datasets.CIFAR10('dataset', train=True,
transform=torchvision.transforms.ToTensor(), download=True)
dataloader = DataLoader(dataset, batch_size=64)
print(vgg16_true)
vgg16_true.classifier.add_module('add_linear', nn.Linear(1000, 30))
print(vgg16_true)
print(vgg16_false)
vgg16_false.classifier[6] = nn.Linear(4096, 10)
print(vgg16_false)
writer = SummaryWriter("logs")
step = 0
for data in dataloader:
imgs, targets = data
output = vgg16_true(imgs)
output = torch.reshape(output, (-1,3,10,8))
writer.add_images("inputs pictures", imgs, step)
writer.add_images("outputs pictures", output, step)
step += 1
writer.close()
添加网络层
print(vgg16_true)
vgg16_true.classifier.add_module('add_linear', nn.Linear(1000, 30))
print(vgg16_true)
修改网络层
print(vgg16_false)
vgg16_false.classifier[6] = nn.Linear(4096, 10)
print(vgg16_false)
模型保存和读取
保存
vgg16 = torchvision.models.vgg16(pretrained=False)
# 保存方式1,模型结构 + 模型参数
torch.save(vgg16, "vgg16_method1.pth")
# 保存方式2,模型参数(官方推荐)
torch.save(vgg16.state_dict(), "vgg16_method2.pth")
读取
# 方式1, 直接加载模型
model = torch.load("vgg16_method1.pth")
# 方式2,先加载无预训练模型, 然后加载模型参数
vgg16 = torchvision.models.vgg16(pretrained=False)
vgg16.load_state_dict(torch.load("vgg16_method2.pth"))
总结
简单过了一遍搭建神经网络的流程, 比较浅显, 后边还要多练习和多读代码提高ML开发的功力.
下篇blog总结一下模型训练相关的流程和技术.
我的blog只是学了什么记录什么, 当然不能跟专业的AI大佬相提并论, 而且主要也是给我自己看的, 如果碰巧能帮助到各位路过的大佬也是荣幸之至.
另外blog中有什么错误还请大佬们斧正斧正.