下面一个使用 PyTorch 对卷积神经网络进行超参数优化(基于交叉验证)的示例。
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import KFold
from skopt import gp_minimize
from skopt.space import Real, Categorical
from skopt.utils import use_named_args
# 定义模型类
class CNN(nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
self.pool = nn.MaxPool2d(2, 2)
self.fc1 = nn.Linear(32 * 8 * 8, 128)
self.fc2 = nn.Linear(128, 10)
def forward(self, x):
x = self.pool(nn.functional.relu(self.conv1(x)))
x = self.pool(nn.functional.relu(self.conv2(x)))
x = x.view(-1, 32 * 8 * 8)
x = nn.functional.relu(self.fc1(x))
x = self.fc2(x)
return x
# 加载数据集
train_loader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)
# 定义交叉验证参数
k = 5
# 定义训练和验证函数
def train(model, train_loader, val_loader, optimizer, criterion, epochs):
for epoch in range(epochs):
running_loss = 0.0
for i, data in enumerate(train_loader):
inputs, labels = data
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
# 输出训练结果
print('Epoch [%d]/[%d] Loss: %.3f' % (epoch+1, epochs, running_loss/(i+1)))
# 验证模型
correct = 0
total = 0
with torch.no_grad():
for data in val_loader:
images, labels = data
outputs = model(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
accuracy = 100 * correct / total
print('Validation Accuracy: %d %%' % (accuracy))
return accuracy
# 定义待处理的超参数
space = [
Real(1e-5, 1e-1, name='learning_rate', prior='log-uniform'),
Categorical([32, 64, 128], name='hidden_size'),
Categorical([2, 3, 4], name='num_layers')
]
# 定义超参数优化函数
@use_named_args(space)
def evaluate_model(**params):
# 初始化 k-fold
kf = KFold(n_splits=k, shuffle=True)
accuracies = []
# 交叉验证训练并计算准确率
for fold, (train_indices, val_indices) in enumerate(kf.split(trainset)):
# 数据分为训练集和验证集
train_sampler = torch.utils.data.sampler.SubsetRandomSampler(train_indices)
val_sampler = torch.utils.data.sampler.SubsetRandomSampler(val_indices)
train_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, sampler=train_sampler)
val_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, sampler=val_sampler)
# 初始化模型
net = CNN()
# 设置超参数
lr = params['learning_rate']
hidden_size = params['hidden_size']
num_layers = params['num_layers']
# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=lr, momentum=0.9)
# 训练模型
accuracy = train(net, train_loader, val_loader, optimizer, criterion, epochs=10)
accuracies.append(accuracy)
# 计算平均准确率
average_accuracy = sum(accuracies) / len(accuracies)
print('Average Validation Accuracy: %.3f' % (average_accuracy))
return -average_accuracy
# 运行超参数优化
result = gp_minimize(evaluate_model, space, n_calls=50, verbose=True)
print('Best Validation Accuracy: %.3f' % (-result.fun))
print('Best Parameters: %s' % (result.x))
这个示例代码中,我们首先定义了我们的 CNN 模型,并加载了数据集。
然后我们使用 scikit-optimize 库来定义超参数空间和优化函数,并运行 gp_minimize 函数来执行优化(其中 n_calls 参数为最大评估次数)。在 evaluate_model 函数中,我们首先初始化 k-fold,将数据拆分为训练和验证集,并计算交叉验证的平均准确率。
针对每个超参数集合,我们将其应用于训练和验证的模型中,并输出每次循环的平均准确率(average accuracy)。最后,我们输出最佳的平均准确率以及相应的最佳超参数集合。