import lightgbm as lgb
def lgb_model(train, target, test, k, seed):
feats = [f for f in train.columns if f not in ['zhdh', 'black_flag']]
print('Current num of features:', len(feats))
oof_probs = np.zeros((train.shape[0],))
output_preds = 0
offline_score = []
feature_importance_df = pd.DataFrame()
parameters = {
'boosting_type': 'gbdt',
'objective': 'binary',
'tree_learner':'serial',
'metric': 'auc',
'min_child_weight': 4,
'num_leaves': 64,
'feature_fraction': 0.8,
'bagging_fraction': 0.8,
'bagging_freq': 4,
'learning_rate': 0.02,
'seed': seed,
'nthread': 32,
'n_jobs':8,
'silent': True,
'verbose': -1,
}
seeds = [2]
for seed in seeds:
folds = StratifiedKFold(n_splits=k, shuffle=True, random_state=seed)
for i, (train_index, test_index) in enumerate(folds.split(train, target)):
train_y, test_y = target.iloc[train_index], target.iloc[test_index]
train_X, test_X = train[feats].iloc[train_index, :], train[feats].iloc[test_index, :]
dtrain = lgb.Dataset(train_X,
label=train_y)
dval = lgb.Dataset(test_X,
label=test_y)
lgb_model = lgb.train(
parameters,
dtrain,
num_boost_round=8000,
valid_sets=[dval],
callbacks=[lgb.early_stopping(100), lgb.log_evaluation(100)],
)
oof_probs[test_index] = lgb_model.predict(test_X[feats], num_iteration=lgb_model.best_iteration) / len(
seeds)
offline_score.append(lgb_model.best_score['valid_0']['auc'])
output_preds += lgb_model.predict(test[feats],
num_iteration=lgb_model.best_iteration) / folds.n_splits / len(seeds)
print(offline_score)
# feature importance
fold_importance_df = pd.DataFrame()
fold_importance_df["feature"] = feats
fold_importance_df["importance"] = lgb_model.feature_importance(importance_type='gain')
fold_importance_df["fold"] = i + 1
feature_importance_df = pd.concat([feature_importance_df, fold_importance_df], axis=0)
print('OOF-MEAN-AUC:%.6f, OOF-STD-AUC:%.6f' % (np.mean(offline_score), np.std(offline_score)))
print('feature importance:')
print(feature_importance_df.groupby(['feature'])['importance'].mean().sort_values(ascending=False).head(50))
return output_preds, oof_probs, np.mean(offline_score), feature_importance_df
print('开始模型训练train')
lgb_preds, lgb_oof, lgb_score, feature_importance_df = lgb_model(train=train_df[feature_cols],
target=train_df['target'],
test=test_df[feature_cols], k=5,seed=2020)
LIGHTGBM
最新推荐文章于 2024-06-18 08:34:35 发布