总结一些Pytorch训练模型的技巧。
指定使用哪些GPU
import os
os.environ["CUDA_VISIBLE_DEVICES"] = '0,1'
选择设备
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('The device is: ', device)
model.to(device)
加载数据集,示例来自pytorch-tutorial
import torchvision
import torchvision.transforms as transforms
# Image preprocessing modules
transform = transforms.Compose([
transforms.Pad(4),
transforms.RandomHorizontalFlip(),
transforms.RandomCrop(32),
transforms.ToTensor()])
# CIFAR-10 dataset
train_dataset = torchvision.datasets.CIFAR10(root='../../data/',
train=True,
transform=transform,
download=True)
# Data loader
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
batch_size=100,
shuffle=True)
如需自定义数据集,则需要继承torch.utils.data.dataset中的Dataset类
from torch.utils.data.dataset import Dataset
class MyCustomDataset(Dataset):
def __init__(self, ...):
# stuff
def __getitem__(self, index):
# stuff
return (img, label)
def __len__(self):
return count
其中包括三个主要部分:
- __init__( ) 一些初始化过程写在这里
- __len__( ) 返回所有数据的数量
- __getitem__( ) 返回数据和标签,可以这样显式调用
img, label = MyCustomDataset.__getitem__(99)
更多内容可以查看自定义数据集的读取方法小结
保存,加载模型参数
# 保存整个网络
torch.save(model, PATH)
# 保存网络中的参数, 速度快,占空间少
torch.save(model.state_dict(), PATH)
#--------------------------------------------------
# 对于上述保存方法,加载的方法分别是:
model_load = torch.load(PATH)
# 仅保存参数,加载时需要先得到网络结构
model_load = MyNet()
model_load.load_state_dict(torch.load(PATH))
如需保存更多信息,如优化器参数,模型准确率等
torch.save({'epoch': epoch + 1, 'state_dict': model.state_dict(),
'best_acc': train_accuracy,'optimizer': optimizer.state_dict()},PATH)
# 对应加载模型参数以字典的形式
model_dict = torch.load('./checkpoint.pth')
model.load_state_dict(model_dict['state_dict'])
optimizer.load_state_dict(model_dict['optimizer'])
微调模型,修改最后一层全连接层输出类别数,以适用于自己的数据集
fc_features = model.fc.in_features
# 假设自己数据集中有20类
model.fc = nn.Linear(fc_features, 20)
冻结模型参数
# 冻结1-8层模型参数,如使用block,则一个block为一层
ct = 1
for child in model.children():
if ct <= 8:
for param in child.parameters():
param.requires_grad = False
ct += 1
print('freeze model')
#对应的,优化器需要修改为
optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=learning_rate, momentum=0.9)
训练模型
# For updating learning rate
def update_lr(optimizer, lr):
for param_group in optimizer.param_groups:
param_group['lr'] = lr
# Train the model
total_step = len(train_loader)
curr_lr = learning_rate
for epoch in range(num_epochs):
model.train()
for i, (images, labels) in enumerate(train_loader):
images = images.to(device)
labels = labels.to(device)
# Forward pass
outputs = model(images)
loss = criterion(outputs, labels)
# Backward and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()
if (i+1) % 100 == 0:
print ("Epoch [{}/{}], Step [{}/{}] Loss: {:.4f}"
.format(epoch+1, num_epochs, i+1, total_step, loss.item()))
# Decay learning rate
if (epoch+1) % 20 == 0:
curr_lr /= 3
update_lr(optimizer, curr_lr)
测试模型,测试前需要调用model.eval(),如需在测试后继续训练,记得在训练前调用model.train()
# Test the model
model.eval()
with torch.no_grad():
correct = 0
total = 0
for images, labels in test_loader:
images = images.to(device)
labels = labels.to(device)
outputs = model(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print('Accuracy of the model on the test images: {} %'.format(100 * correct / total))
使用多GPU训练,调用nn.DataParallel,来自知乎-pytorch如何使用多GPU
import torch.nn as nn
if device == torch.device('cuda'):
print('set parallel')
model_parallel = nn.DataParallel(model, device_ids=[0,3])
torch.backends.cudnn.benchmark = True
# 对应的,保存模型时需要从model_parallel中取得model
torch.save(model_parallel.module.state_dict(), PATH)