3.1 基本配置
3.1.1 常用包
import os
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torch.optim as optimizer
3.1.2 GPU的设置
# 方案一:使用os.environ,这种情况如果使用GPU不需要设置
os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'
# 方案二:使用“device”,后续对要使用GPU的变量用.to(device)即可
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
3.2 数据导入
PyTorch数据读入是通过Dataset+DataLoader的方式完成的,Dataset定义好数据的格式和数据变换形式,DataLoader用iterative的方式不断读入批次数据。
3.2.1 数据读入(下载)
以cifar10数据集为例给出构建Dataset类的方式:
import torch
from torchvision import datasets
train_data = datasets.ImageFolder(train_path, transform=data_transform)
val_data = datasets.ImageFolder(val_path, transform=data_transform)
3.2.2 数据转换(载入)
import torchvision
transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize(mean=[0.5], std=[0.5])])
3.2.3 数据打包(装载)
import torch
trainDataLoader = torch.utils.data.DataLoader(dataset = trainData,batch_size = BATCH_SIZE,shuffle = True)
testDataLoader = torch.utils.data.DataLoader(dataset = testData,batch_size = BATCH_SIZE)
3.4 模型构建
3.4.1 神经网络的构造
import torch
from torch import nn
class MLP(nn.Module):
# 声明带有模型参数的层,这里声明了两个全连接层
def __init__(self, **kwargs):
# 调用MLP父类Block的构造函数来进行必要的初始化。这样在构造实例时还可以指定其他函数
super(MLP, self).__init__(**kwargs)
self.hidden = nn.Linear(784, 256)
self.act = nn.ReLU()
self.output = nn.Linear(256,10)
# 定义模型的前向计算,即如何根据输入x计算返回所需要的模型输出
def forward(self, x):
o = self.act(self.hidden(x))
return self.output(o)
3.4.2 神经网络中常见的层
下例子包含了卷积层、池化层、全连接层。
class Net(nn.Module):
def __init__(self):
super(Net,self).__init__()
self.model = nn.Sequential(
#The size of the picture is 28*28*1
nn.Conv2d(in_channels=1, out_channels=16, kernel_size=3,stride=1,padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2,stride=2),
#The size of the picture is 14*14*16
nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3,stride=1,padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2,stride=2),
#The size of the picture is 7*7*32
nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3,stride=1,padding=1),
nn.ReLU(),
nn.Flatten(), #One dimension is changed
nn.Linear(in_features=7*7*64, out_features=128),
nn.ReLU(),
nn.Linear(in_features=128, out_features=10),
nn.Softmax(dim=1),
)
def forward(self,input):
output = self.model(input)
return output
3.5 模型初始化
在深度学习模型的训练中,权重的初始值极为重要。一个好的权重值,会使模型收敛速度提高,使模型准确率更精确。为了利于训练和减少收敛时间,我们需要对模型进行合理的初始化。PyTorch也在torch.nn.init
中为我们提供了常用的初始化方法。
3.5.1 torch.nn.init使用
我们通常会根据实际模型来使用torch.nn.init
进行初始化,通常使用isinstance
来进行判断模块属于什么类型。
import torch
import torch.nn as nn
conv = nn.Conv2d(1,3,3)
linear = nn.Linear(10,1)
isinstance(conv,nn.Conv2d) -------> ture
isinstance(linear,nn.Conv2d) ------> flase
对于不同层使用不同的方式进行初始化。
# 对conv进行kaiming初始化
torch.nn.init.kaiming_normal_(conv.weight.data)
conv.weight.data
# 对linear进行常数初始化
torch.nn.init.constant_(linear.weight.data,0.3)
linear.weight.data
3.5.2 初始化函数的封装
def initialize_weights(self):
for m in self.modules():
# 判断是否属于Conv2d
if isinstance(m, nn.Conv2d):
torch.nn.init.xavier_normal_(m.weight.data)
# 判断是否有偏置
if m.bias is not None:
torch.nn.init.constant_(m.bias.data,0.3)
elif isinstance(m, nn.Linear):
torch.nn.init.normal_(m.weight.data, 0.1)
if m.bias is not None:
torch.nn.init.zeros_(m.bias.data)
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zeros_()
3.6 损失函数
损失函数的种类有很多使用比较多的有:
均方误差:
torch.nn.MSELoss(size_average=None, reduce=None, reduction='mean')
交叉熵误差:
torch.nn.CrossEntropyLoss(weight=None, size_average=None, ignore_index=-100, reduce=None, reduction='mean')
3.7 训练和评估
一个完整的图像分类的训练过程如下所示:
def train(epoch):
model.train()
train_loss = 0
for data, label in train_loader:
data, label = data.cuda(), label.cuda()
optimizer.zero_grad()
output = model(data)
loss = criterion(label, output)
loss.backward()
optimizer.step()
train_loss += loss.item()*data.size(0)
train_loss = train_loss/len(train_loader.dataset)
print('Epoch: {} \tTraining Loss: {:.6f}'.format(epoch, train_loss))
一个完整图像分类的验证过程如下所示:
def val(epoch):
model.eval()
val_loss = 0
with torch.no_grad():
for data, label in val_loader:
data, label = data.cuda(), label.cuda()
output = model(data)
preds = torch.argmax(output, 1)
loss = criterion(output, label)
val_loss += loss.item()*data.size(0)
running_accu += torch.sum(preds == label.data)
val_loss = val_loss/len(val_loader.dataset)
print('Epoch: {} \tTraining Loss: {:.6f}'.format(epoch, val_loss))
3.8 Pytorch优化器
3.8.1 Pytorch提供的优化器
Pytorch很人性化的给我们提供了一个优化器的库torch.optim,在这里面提供了十种优化器。
-
torch.optim.ASGD
-
torch.optim.Adadelta
-
torch.optim.Adagrad
-
torch.optim.Adam
-
torch.optim.AdamW
-
torch.optim.Adamax
-
torch.optim.LBFGS
-
torch.optim.RMSprop
-
torch.optim.Rprop
-
torch.optim.SGD
-
torch.optim.SparseAdam
3.8.2 例子
以Adam为例
optimizer = torch.optim.Adam(net.parameters()) #开始训练前 实例化优化器
for epoch in range(EPOCH): #开始训练后
...
optimizer.zero_grad() #梯度置零
loss = ... #计算loss
loss.backward() #BP反向传播
optimizer.step() #梯度更新