lgb 自定义函数
模板:
# f(preds: array, train_data: Dataset) -> name: str, eval_result: float, is_higher_better: bool
def f(preds, train_data):
'''
:param preds: array, 预测值
:param train_data: lgb Dataset, lgb的传入数据集
:return:
'''
### 返回 (评估指标名称, 评估计算值, 是否评估值越大模型性能越好)
return meaticname: str, eval_result: float, is_higher_better: bool
示例:
二分类 accuracy:
def accuracy(preds, train_data):
labels = train_data.get_label()
preds = 1. / (1. + np.exp(-preds))
return 'accuracy', np.mean(labels == (preds > 0.5)), True
多分类 f1_score:
# 多分类 f1_score 14分类
def MultiAuc_f1score(preds, train_data):
y_label = train_data.get_label()
preds = preds.reshape(14,-1).T # 14 分类
y_pred = np.argmax(preds, axis=-1)
multi_f1_score = f1_score(y_pred, y_label, average='macro')
return 'MultiAuc_f1score', multi_f1_score, True
测试案例:
import sklearn
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score,mean_squared_error
import numpy as np
from matplotlib import pyplot as plt
import lightgbm as lgb
import pickle
from scipy.stats import rankdata
boston_price = datasets.load_breast_cancer()
data = boston_price.data
target = boston_price.target
# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2, random_state=1024)
print("Train data length:", len(X_train))
print("Test data length:", len(X_test))
# 转换为Dataset数据格式
lgb_train = lgb.Dataset(X_train, y_train)
lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train)
# 参数
params = {
'boosting_type': 'gbdt', # 设置提升类型
'objective': 'binary', # 目标函数
'metric': {'auc'}, # 评估函数
'num_leaves': 64, # 叶子节点数
'learning_rate': 0.05, # 学习速率
'nthread': 20,
'verbose': -1 # <0 显示致命的, =0 显示错误 (警告), >0 显示信息
}
### custom metric test
print("custom metric test ")
# 自定义metric
def custom_auc(preds, train_data):
labels = train_data.get_label()
# preds = 1. / (1. + np.exp(-preds))
return 'my_auc', roc_auc_score(labels, preds,), True
evals_result = {} # to record eval results
gbm = lgb.train(params, lgb_train, num_boost_round=100, valid_sets=lgb_eval,early_stopping_rounds=5,evals_result=evals_result,
feval=custom_auc)
# 模型预测
y_pred = gbm.predict(X_test, num_iteration=gbm.best_iteration) # shape [B]
执行结果:
custom metric test
[1] valid_0’s auc: 0.965602 valid_0’s my_auc: 0.965602
Training until validation scores don’t improve for 5 rounds.
[2] valid_0’s auc: 0.966304 valid_0’s my_auc: 0.966304
[3] valid_0’s auc: 0.965953 valid_0’s my_auc: 0.967883
[4] valid_0’s auc: 0.96683 valid_0’s my_auc: 0.96683
[5] valid_0’s auc: 0.962092 valid_0’s my_auc: 0.962092
[6] valid_0’s auc: 0.964549 valid_0’s my_auc: 0.964549
[7] valid_0’s auc: 0.964022 valid_0’s my_auc: 0.964022
[8] valid_0’s auc: 0.962969 valid_0’s my_auc: 0.962969
Early stopping, best iteration is:
[3] valid_0’s auc: 0.965953 valid_0’s my_auc: 0.967883
自定义的auc和lightgbm自带的auc评估效果一致,处理epoch=3时两者对不上,可能是因为内部的细节。