最近我们公司在开发一个神经网络标准化训练流程,我打算神经网络超参数采用自动调参模式,因此调研了目前,常用的几种超参数体调参工具:
1.hyperopt
Hyperopt是一个强大的python库,用于超参数优化,由jamesbergstra开发。Hyperopt使用贝叶斯优化的形式进行参数调整,允许你为给定模型获得最佳参数。它可以在大范围内优化具有数百个参数的模型。
可选择以下三种算法进行优化:
- 随机搜索算法
- 模拟退火算法
- TPE算法
一般步骤:
- 定义目标函数(对于神经网络,目标函数就是训练过程,输出loss);
- 定义参数空间
- 开始参数优化
举个ANN 神经网络,调参案例:
1.定义神经网络、模型训练类
# 神经网络模型定义
class ANNModel(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(ANNModel, self).__init__()
self.fc1 = nn.Linear(input_size, hidden_size)
self.relu1 = nn.ReLU()
self.fc2 = nn.Linear(hidden_size, hidden_size)
self.relu2 = nn.ReLU()
self.fc3 = nn.Linear(hidden_size, output_size)
def forward(self, x):
x = self.relu1(self.fc1(x))
x = self.relu2(self.fc2(x))
return self.fc3(x)
# 模型训练器
class ModelTrainer:
def __init__(self, model, config):
self.model = model.to(config.DEVICE)
self.config = config
self.criterion = nn.MSELoss()
self.optimizer = optim.Adam(model.parameters(), lr=config.LEARNING_RATE)
self.best_loss = float('inf')
def get_data_loader(self, data, batch_size, shuffle=True):
return DataLoader(data, batch_size=batch_size, shuffle=shuffle)
def evaluate(self, data_loader):
self.model.eval()
total_loss = 0.0
with torch.no_grad():
for X, y in data_loader:
X, y = X.to(self.config.DEVICE), y.to(self.config.DEVICE)
outputs = self.model(X)
loss = self.criterion(outputs, y)
total_loss += loss.item()
return total_loss / len(data_loader)
def train(self, train_loader, val_loader):
train_losses, val_losses = [], []
for epoch in range(self.config.NUM_EPOCHS):
self.model.train()
epoch_loss = 0.0
for X, y in train_loader:
X, y = X.to(self.config.DEVICE), y.to(self.config.DEVICE)
self.optimizer.zero_grad()
outputs = self.model(X)
loss = self.criterion(outputs, y)
loss.backward()
self.optimizer.step()
epoch_loss += loss.item()
avg_train_loss = epoch_loss / len(train_loader)
train_losses.append(avg_train_loss)
val_loss = self.evaluate(val_loader)
val_losses.append(val_loss)
if val_loss < self.best_loss:
self.best_loss = val_loss
print(f'Epoch [{epoch+1}/{self.config.NUM_EPOCHS}] Train Loss: {avg_train_loss:.4f} Val Loss: {val_loss:.4f}')
return np.sqrt(self.best_loss) # 返回最佳RMSE
2.定义目标函数
# 贝叶斯优化目标函数
def hyperopt_objective(params, x_train, y_train):
class Config:
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
BATCH_SIZE = params['batch_size']
NUM_EPOCHS = params['num_epochs']
LEARNING_RATE = params['learning_rate']
HIDDEN_SIZE = params['hidden_size']
config = Config()
# 设置随机种子
set_seed(42)
# 准备数据
X_train, X_val, y_train_split, y_val = train_test_split(
x_train.values, y_train.values, test_size=0.2, random_state=42
)
train_data = TensorDataset(torch.FloatTensor(X_train), torch.FloatTensor(y_train_split))
val_data = TensorDataset(torch.FloatTensor(X_val), torch.FloatTensor(y_val))
model = ANNModel(input_size=X_train.shape[1],
hidden_size=config.HIDDEN_SIZE,
output_size=y_train_split.shape[1])
trainer = ModelTrainer(model, config)
train_loader = trainer.get_data_loader(train_data, config.BATCH_SIZE)
val_loader = trainer.get_data_loader(val_data, config.BATCH_SIZE, shuffle=False)
rmse = trainer.train(train_loader, val_loader)
print(f"LR: {params['learning_rate']:.4f}, Best RMSE: {rmse:.4f}")
return rmse
3. 定义参数空间
# 超参数列表
lr_list = [0.001, 0.01, 0.1]
num_epochs_list = [100, 200,500,1000]
batch_size_list = [6, 12, 24,36]
hidden_size_list = [16, 32, 64,128]
# 参数空间定义
param_space = {
'learning_rate': hp.choice('lr', lr_list),
'num_epochs': hp.choice('num_epochs', num_epochs_list),
'batch_size': hp.choice('batch_size', batch_size_list),
'hidden_size': hp.choice('hidden_size', hidden_size_list)
}
4.开启参数优化
def param_hyperopt(x_train, y_train, max_evals=100):
trials = Trials()
best = fmin(fn=lambda params: hyperopt_objective(params, x_train, y_train),
space=param_space,
algo=tpe.suggest,
max_evals=max_evals,
trials=trials,
early_stop_fn=no_progress_loss(20),
verbose=True)
print("Best parameters:")
print({'learning_rate': lr_list[best['lr']]})
print({'num_epochs': num_epochs_list[best['num_epochs']]})
print({'batch_size': batch_size_list[best['batch_size']]})
print({'hidden_size': hidden_size_list[best['hidden_size']]})
return best, trials
best 返回的结果是列表索引
trials = Trials() 的作用是记录中间过程和最终结果,可使用trials.best_trial
最后是完成的代码
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import train_test_split
from hyperopt import hp, fmin, tpe, Trials
from hyperopt.early_stop import no_progress_loss
import numpy as np
import pandas as pd
import random
# 设置随机种子
def set_seed(seed=42):
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
# 超参数列表
lr_list = [0.001, 0.01, 0.1]
num_epochs_list = [100, 200,500,1000]
batch_size_list = [6, 12, 24,36]
hidden_size_list = [16, 32, 64,128]
# 神经网络模型定义
class ANNModel(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(ANNModel, self).__init__()
self.fc1 = nn.Linear(input_size, hidden_size)
self.relu1 = nn.ReLU()
self.fc2 = nn.Linear(hidden_size, hidden_size)
self.relu2 = nn.ReLU()
self.fc3 = nn.Linear(hidden_size, output_size)
def forward(self, x):
x = self.relu1(self.fc1(x))
x = self.relu2(self.fc2(x))
return self.fc3(x)
# 模型训练器
class ModelTrainer:
def __init__(self, model, config):
self.model = model.to(config.DEVICE)
self.config = config
self.criterion = nn.MSELoss()
self.optimizer = optim.Adam(model.parameters(), lr=config.LEARNING_RATE)
self.best_loss = float('inf')
def get_data_loader(self, data, batch_size, shuffle=True):
return DataLoader(data, batch_size=batch_size, shuffle=shuffle)
def evaluate(self, data_loader):
self.model.eval()
total_loss = 0.0
with torch.no_grad():
for X, y in data_loader:
X, y = X.to(self.config.DEVICE), y.to(self.config.DEVICE)
outputs = self.model(X)
loss = self.criterion(outputs, y)
total_loss += loss.item()
return total_loss / len(data_loader)
def train(self, train_loader, val_loader):
train_losses, val_losses = [], []
for epoch in range(self.config.NUM_EPOCHS):
self.model.train()
epoch_loss = 0.0
for X, y in train_loader:
X, y = X.to(self.config.DEVICE), y.to(self.config.DEVICE)
self.optimizer.zero_grad()
outputs = self.model(X)
loss = self.criterion(outputs, y)
loss.backward()
self.optimizer.step()
epoch_loss += loss.item()
avg_train_loss = epoch_loss / len(train_loader)
train_losses.append(avg_train_loss)
val_loss = self.evaluate(val_loader)
val_losses.append(val_loss)
if val_loss < self.best_loss:
self.best_loss = val_loss
print(f'Epoch [{epoch+1}/{self.config.NUM_EPOCHS}] Train Loss: {avg_train_loss:.4f} Val Loss: {val_loss:.4f}')
return np.sqrt(self.best_loss) # 返回最佳RMSE
# 贝叶斯优化目标函数
def hyperopt_objective(params, x_train, y_train):
class Config:
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
BATCH_SIZE = params['batch_size']
NUM_EPOCHS = params['num_epochs']
LEARNING_RATE = params['learning_rate']
HIDDEN_SIZE = params['hidden_size']
config = Config()
# 设置随机种子
set_seed(42)
# 准备数据
X_train, X_val, y_train_split, y_val = train_test_split(
x_train.values, y_train.values, test_size=0.2, random_state=42
)
train_data = TensorDataset(torch.FloatTensor(X_train), torch.FloatTensor(y_train_split))
val_data = TensorDataset(torch.FloatTensor(X_val), torch.FloatTensor(y_val))
model = ANNModel(input_size=X_train.shape[1],
hidden_size=config.HIDDEN_SIZE,
output_size=y_train_split.shape[1])
trainer = ModelTrainer(model, config)
train_loader = trainer.get_data_loader(train_data, config.BATCH_SIZE)
val_loader = trainer.get_data_loader(val_data, config.BATCH_SIZE, shuffle=False)
rmse = trainer.train(train_loader, val_loader)
print(f"LR: {params['learning_rate']:.4f}, Best RMSE: {rmse:.4f}")
return rmse
# 参数空间定义
param_space = {
'learning_rate': hp.choice('lr', lr_list),
'num_epochs': hp.choice('num_epochs', num_epochs_list),
'batch_size': hp.choice('batch_size', batch_size_list),
'hidden_size': hp.choice('hidden_size', hidden_size_list)
}
def param_hyperopt(x_train, y_train, max_evals=100):
trials = Trials()
best = fmin(fn=lambda params: hyperopt_objective(params, x_train, y_train),
space=param_space,
algo=tpe.suggest,
max_evals=max_evals,
trials=trials,
early_stop_fn=no_progress_loss(20),
verbose=True)
print("Best parameters:")
print({'learning_rate': lr_list[best['lr']]})
print({'num_epochs': num_epochs_list[best['num_epochs']]})
print({'batch_size': batch_size_list[best['batch_size']]})
print({'hidden_size': hidden_size_list[best['hidden_size']]})
return best, trials
if __name__ == '__main__':
# 设置随机种子
set_seed(42)
# 数据加载
df = pd.read_csv('dataset/data_M.csv', skiprows=3)
X = df.iloc[:, :-3]
y = df.iloc[:, -3:]
# 数据分割
x_train, x_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42
)
# 运行贝叶斯优化
params_best, trials = param_hyperopt(x_train=x_train, y_train=y_train, max_evals=100)
best_trial = trials.best_trial
print("Best Trial ID:", best_trial['tid'])
print("Best Parameters:", best_trial['misc']['vals'])
print("Best Loss:", best_trial['result']['loss'])
但是有个问题,由于Hyperopt 是随机搜索,导致每次训练的结果都不一致,不稳定,大家如果有稳定的优化调参工具可评论分享出来。
有需要数据集的,可私信我。