import lightgbm as lgb
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score, accuracy_score, f1_score, precision_score, recall_score
import warnings
warnings.filterwarnings('ignore')
X_train, X_test = data[~data['label'].isna()], data[data['label'].isna()]
y = X_train['label']
KF = StratifiedKFold(n_splits=5, shuffle=True, random_state=2021)
# params = {
# 'verbose': -1,
# 'num_leaves':64,
# 'max_depth':10,
# 'learning_rate':0.01,
# 'n_estimators':10000,
# 'subsample':0.8,
# 'feature_fraction':0.8,
# 'reg_alpha':0.5,
# 'reg_lambda':0.5,
# 'random_state':100,
# 'metric':'auc'
# }
parameters = {
'learning_rate': 0.05,
'boosting_type': 'gbdt',
'objective': 'binary',
'metric': 'auc',
'num_leaves': 32,
'feature_fraction': 0.8,
'bagging_fraction': 0.8,
'bagging_freq': 5,
'seed': 2020,
'bagging_seed': 1,
'feature_fraction_seed': 7,
'min_data_in_leaf': 20,
'n_jobs': -1,
'verbose': -1,
}
oof_lgb = np.zeros(len(X_train))
for fold_, (trn_idx, val_idx) in enumerate(KF.split(X_train.values, y.values)):
print("fold n°{}".format(fold_))
trn_data = lgb.Dataset(X_train.iloc[trn_idx][features],label=y.iloc[trn_idx])
val_data = lgb.Dataset(X_train.iloc[val_idx][features],label=y.iloc[val_idx])
num_round = 10000
clf = lgb.train(
params,
trn_data,
num_round,
valid_sets = [trn_data, val_data],
verbose_eval=500,
early_stopping_rounds=100,
)
oof_lgb[val_idx] = clf.predict(X_train.iloc[val_idx][features], num_iteration=clf.best_iteration)
clf.save_model(f'model/model_{fold_}.txt')
print("AUC score: {}".format(roc_auc_score(y, oof_lgb)))
print("F1 score: {}".format(f1_score(y, [1 if i >= 0.5 else 0 for i in oof_lgb])))
print("Precision score: {}".format(precision_score(y, [1 if i >= 0.5 else 0 for i in oof_lgb])))
print("Recall score: {}".format(recall_score(y, [1 if i >= 0.5 else 0 for i in oof_lgb])))
lightGBM分类模型
最新推荐文章于 2024-04-29 20:00:00 发布