使用 Optuna 进行 LightGBM 调参的脚本,并自定义评价函数,综合考虑边缘样本区分度和总体KS

import optuna
import lightgbm as lgb
from sklearn.metrics import roc_auc_score, recall_score

# 自定义评价函数:edge_separation_and_ks
def edge_separation_and_ks(y_true, y_pred):
    # 计算得分最高和最低的1%样本的预测概率阈值
    num_samples = len(y_pred)
    num_edge_samples = int(num_samples * 0.01)
    cutoff_threshold_high = sorted(y_pred)[::-1][num_edge_samples]
    cutoff_threshold_low = sorted(y_pred)[num_edge_samples]

    # 将预测概率在两个阈值之间的样本视为不确定性样本
    uncertainty_mask = (y_pred > cutoff_threshold_low) & (y_pred < cutoff_threshold_high)

    # 计算得分高和低的1%样本的平均真实标签分数差
    y_true_sorted = [t for _, t in sorted(zip(y_pred, y_true))]
    high_score_samples = y_true_sorted[:num_edge_samples]
    low_score_samples = y_true_sorted[-num_edge_samples:]
    score_diff = sum(high_score_samples) / num_edge_samples - sum(low_score_samples) / num_edge_samples

    # 计算KS值
    fpr, tpr, thresholds = roc_curve(y_true, y_pred)
    ks = max(tpr - fpr)

    # 综合考虑Edge Separation和KS作为评价值
    return 'edge_separation_and_ks', score_diff * (1 + uncertainty_mask.mean()) * ks, True

# 数据准备
train_data = lgb.Dataset(X_train, label=y_train)
val_data = lgb.Dataset(X_val, label=y_val)

# 定义优化目标函数:maximize_edge_separation_and_ks
def maximize_edge_separation_and_ks(trial):
    # 参数搜索空间
    params = {
        'boosting_type': 'gbdt',
        'objective': 'binary',
        'metric': 'None',
        'num_leaves': trial.suggest_int('num_leaves', 16, 128),
        'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.1),
        'feature_fraction': trial.suggest_uniform('feature_fraction', 0.6, 1.0),
        'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.6, 1.0),
        'bagging_freq': trial.suggest_int('bagging_freq', 1, 10),
        'verbose': -1
    }

    # 训练模型并计算Edge Separation和KS评价值
    model = lgb.train(params,
                      train_data,
                      valid_sets=[train_data, val_data],
                      early_stopping_rounds=100,
                      evals_result={},
                      num_boost_round=5000,
                      verbose_eval=False,
                      feval=edge_separation_and_ks)
    
    y_pred_val = model.predict(X_val)
    edge_separation_and_ks_val = edge_separation_and_ks(y_val, y_pred_val)[1]
    
    return edge_separation_and_ks_val

# 创建Optuna试验对象并运行优化
study = optuna.create_study(direction='maximize')
study.optimize(maximize_edge_separation_and_ks, n_trials=50)

# 输出最佳参数和Edge Separation和KS评价值
print('Best Parameters: ', study.best_params)
best_model = lgb.train({**study.best_params, 'metric': 'None'},
                       train_data,
                       valid_sets=[train_data, val_data],
                       early_stopping_rounds=100,
                       evals_result={},
                       num_boost_round=5000,
                       verbose_eval=False,
                       feval=edge_separation_and_ks)
y_pred_test = best_model.predict(X_test)
edge_separation_and_ks_test = edge_separation_and_ks(y_test, y_pred_test)[1]
print('Edge Separation and KS on Test Set: {:.4f}'.format(edge_separation_and_ks_test))

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

田晖扬

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值