params = {
'num_leaves': 31,
'min_data_in_leaf': 20,
'min_child_samples':20,
'objective': 'regression',
'learning_rate': 0.01,
"boosting": "gbdt",
"feature_fraction": 0.8,
"bagging_freq": 1,
"bagging_fraction": 0.85,
"bagging_seed": 23,
"metric": 'rmse',
"lambda_l1": 0.2,
"nthread": 4,
}
folds = KFold(n_splits=5, shuffle=True, random_state=2333)
oof_lgb = np.zeros(len(train))
predictions_lgb = np.zeros(len(test))
feature_importance_df = pd.DataFrame()
for fold_, (trn_idx, val_idx) in enumerate(folds.split(train.values, target.values)):
print("fold {}".format(fold_))
trn_data = lgb.Dataset(train.iloc[trn_idx], label=target.iloc[trn_idx], categorical_feature=categorical_feats)
val_data = lgb.Dataset(train.iloc[val_idx], label=target.iloc[val_idx], categorical_feature=categorical_feats)
num_round = 10000
clf = lgb.train(params, trn_data, num_round, valid_sets = [trn_data, val_data], verbose_eval=500, early_stopping_rounds = 200)
oof_lgb[val_idx] = clf.predict(train.iloc[val_idx], num_iteration=clf.best_iteration)
fold_importance_df = pd.DataFrame()
fold_importance_df["feature"] = features
fold_importance_df["importance"] = clf.feature_importance()
fold_importance_df["fold"] = fold_ + 1
feature_importance_df = pd.concat([feature_importance_df, fold_importance_df], axis=0)
predictions_lgb += clf.predict(test, num_iteration=clf.best_iteration) / folds.n_splits
print("CV Score: {:<8.5f}".format(r2_score(target, oof_lgb)))
fold 0
Training until validation scores don't improve for 200 rounds.
D:\app\anacode\anaconda\lib\site-packages\lightgbm\basic.py:1184: UserWarning: Using categorical_feature in Dataset.
warnings.warn('Using categorical_feature in Dataset.')
D:\app\anacode\anaconda\lib\site-packages\lightgbm\basic.py:742: UserWarning: categorical_feature in param dict is overridden.
warnings.warn('categorical_feature in param dict is overridden.')
[500] training's rmse: 810.06 valid_1's rmse: 928.603
[1000] training's rmse: 712.357 valid_1's rmse: 907.149
[1500] training's rmse: 659.304 valid_1's rmse: 903.82
Early stopping, best iteration is:
[1542] training's rmse: 655.892 valid_1's rmse: 903.404
fold 1
Training until validation scores don't improve for 200 rounds.
D:\app\anacode\anaconda\lib\site-packages\lightgbm\basic.py:1184: UserWarning: Using categorical_feature in Dataset.
warnings.warn('Using categorical_feature in Dataset.')
D:\app\anacode\anaconda\lib\site-packages\lightgbm\basic.py:742: UserWarning: categorical_feature in param dict is overridden.
warnings.warn('categorical_feature in param dict is overridden.')
房租赛-模型测试
最新推荐文章于 2023-03-15 16:16:35 发布
![](https://img-home.csdnimg.cn/images/20240711042549.png)