本篇记录自己之前用过的调优方法,一种基于for循环,一种基于Hyperopt。
Hyperopt提供了一个优化接口,这个接口接受一个评估函数和参数空间,能计算出参数空间内的一个点的损失函数值。用户还要
指定空间内参数的分布情况。
Hyheropt四个重要的因素:指定需要最小化的函数,搜索的空间,采样的数据集(trails database)(可选),搜索的算法(可选)。(具体可以百度,下面给出lightgbm调参实例)
#模型调参方法1
print('设置基本参数')
params1 = {
'boosting_type': 'gbdt',
'objective': 'binary',
'metric': 'binary_logloss',
}
min_merror = float('-Inf')
best_params = {}
import lightgbm as lgb
lgb_train = lgb.Dataset(train, train_y, free_raw_data=False) #训练数据
print("调参1:提高准确率")
for num_leaves in range(2,10,1): #对num_leaves遍历
for max_depth in range(1,5,1): #对max_depth遍历
for learning_rate in [0.01,0.02,0.025,0.03,0.035,0.04,0.05]: #对learning_rate遍历
params1['num_leaves'] = num_leaves
params1['max_depth'] = max_depth
params1['learning_rate'] =learning_rate
cv_results = lgb.cv(
params1,
lgb_train,
seed=2018,
nfold=10,
metrics='auc',
num_boost_round=10000,
early_stopping_rounds=100,
verbose_eval=True
)
mean_merror = pd.Series(cv_results['auc-mean']).max()
boost_rounds = pd.Series(cv_results['auc-mean']).argmax()
if mean_merror >min_merror:
min_merror = mean_merror
best_params['num_leaves'] = num_leaves
best_params['max_depth'] = max_depth
best_params['learning_rate'] = learning_rate
params1['num_leaves'] = best_params['num_leaves']
params1['max_depth'] = best_params['max_depth']
params1['learning_rate'] = best_params['learning_rate']
#模型调参方法2
from hyperopt import STATUS_OK
from hyperopt import hp, fmin, rand, tpe, space_eval
from functools import partial
N_FOLDS = 10
#损失函数
def objective(params, n_folds=N_FOLDS):
cv_results = lgb.cv(params, lgb_train, nfold=n_folds, num_boost_round=10000,
early_stopping_rounds=100, metrics='auc', seed=2018,
verbose_eval=True)
best_score = max(cv_results['auc-mean'])
# Loss must be minimized
loss = 1 - best_score
# Dictionary with information for evaluation
return {'loss': loss, 'params': params, 'status': STATUS_OK}
# Define the search space
space = {
'boosting_type': 'gbdt',
'objective': 'binary',
'metric': 'auc',
'max_depth':hp.choice('max_depth',range(1,8,1)),
'num_leaves':hp.choice('num_leaves',range(2,64,1)),
'feature_fraction':hp.loguniform('feature_fraction',np.log(0.1), np.log(1)),
'bagging_fraction':hp.loguniform('bagging_fraction',np.log(0.1), np.log(1)),
'bagging_freq':hp.choice('bagging_freq',range(1,80,1)),
'lambda_l1': hp.uniform('lambda_l1', 0.0, 1.0),
'lambda_l2': hp.uniform('lambda_l2', 0.0, 1.0),
'min_split_gain':hp.loguniform( 'min_split_gain' ,np.log(0.1), np.log(1)),
'learning_rate': hp.loguniform('learning_rate', np.log(0.005), np.log(0.6))
}
#指定算法参数
algo = partial(tpe.suggest, n_startup_jobs=10)
#best = fmin(q, space, algo=algo) # q is your objective
# test
MAX_EVALS = 200
# Optimize
best = fmin(fn = objective, space = space, algo = tpe.suggest, max_evals = MAX_EVALS)
params11=space_eval(space, best)
print(space_eval(space, best))