sklearn API
目前有自定义函数:
def self_metric1(y_true,y_predict):
d = pd.DataFrame()
d['prob'] = list(y_predict)
d['y'] = list(y_true)
d = d.sort_values(['prob'], ascending=[0])
y = d.y
PosAll = pd.Series(y).value_counts()[1]
NegAll = pd.Series(y).value_counts()[0]
pCumsum = d['y'].cumsum()
nCumsum = np.arange(len(y)) - pCumsum + 1
pCumsumPer = pCumsum / PosAll
nCumsumPer = nCumsum / NegAll
TR1 = pCumsumPer[abs(nCumsumPer-0.001).idxmin()]
TR2 = pCumsumPer[abs(nCumsumPer-0.005).idxmin()]
TR3 = pCumsumPer[abs(nCumsumPer-0.01).idxmin()]
return "self_metric", 0.4 * TR1 + 0.3 * TR2 + 0.3 * TR3, True
使用方法:
clf = LGBMClassifier(
learning_rate=0.05,
n_estimators=10230,
num_leaves=31,
subsample=0.8,
colsample_bytree=0.8,
random_state=1023,
metric=None
)
clf.fit(
trn_x, trn_y,
eval_set=[(val_x, val_y)],
# categorical_feature=cate_cols
eval_metric=self_metric1,#eval_metric=lambda y_true, y_pred: [self_metric1(y_true, y_pred)],
early_stopping_rounds=200,
verbose=200
)
xgboost的用法类似