XGBOOST（超参网格搜索）

最新推荐文章于 2024-06-21 11:13:52 发布

天上飞下一毛雪

最新推荐文章于 2024-06-21 11:13:52 发布

阅读量8.2k

点赞数 1

分类专栏： Python

本文链接：https://blog.csdn.net/qq_39622065/article/details/80384319

版权

Python 专栏收录该内容

82 篇文章 3 订阅

订阅专栏

import xgboost as xgb
from xgboost.sklearn import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.model_selection import GridSearchCV
import pandas as pd
import return_data
from xgboost import plot_importance
from matplotlib import pyplot as plt
import warnings
warnings.filterwarnings("ignore")

#alg传入XGBOOST,X_train传入训练数据特征信息，Y_train传入训练数据标签信息  X_testdata最后要预测的值
def XGBmodelfit(alg, X_train, Y_train,X_test=None,Y_test=None,X_predictions=None,useTrainCV=True, cv_folds=5, early_stopping_rounds=200):
    if useTrainCV:
        xgb_param = alg.get_xgb_params()
        xgtrain = xgb.DMatrix(X_train, label=Y_train)
        cvresult = xgb.cv(xgb_param, xgtrain, num_boost_round=alg.get_params()['n_estimators'], nfold=cv_folds,
            metrics='auc', early_stopping_rounds=early_stopping_rounds, show_stdv=False)
        alg.set_params(n_estimators=cvresult.shape[0])

    #训练模型
    alg.fit(X_train, Y_train,eval_metric='auc')

    #预测结果:
    dtrain_predictions = alg.predict(X_test)  #输出 0 或 1
    # dtrain_predprob = alg.predict_proba(X_test)[:,1]   #输出概率

    #打印报告信息:
    print("\nModel Report")
    print("Accuracy  (Train) : %.4g" % metrics.accuracy_score(Y_test, dtrain_predictions))
    print("AUC Score (Train): %f" % metrics.roc_auc_score(Y_test, dtrain_predictions))
    print(alg)
    print("the best:")
    print(cvresult.shape[0])
    plot_importance(alg)
    plt.show()

    # feat_imp = pd.Series(alg.booster().get_fscore()).sort_values(ascending=False)
    # feat_imp.plot(kind='bar', title='Feature Importances')
    # plt.ylabel('Feature Importance Score')

dataset_X,dataset_Y=return_data.return_tarin_data()

X_train, X_test, y_train, y_test = train_test_split(dataset_X, dataset_Y,
                                                  test_size=0.2,
                                                  random_state=45)

xgb1 = XGBClassifier(
     learning_rate =0.1,
     n_estimators=1000,
     max_depth=5,
     min_child_weight=1,
     gamma=0,
     subsample=0.8,
     colsample_bytree=0.8,
     objective= 'binary:logistic',
     nthread=4,
     scale_pos_weight=1,
     seed=27)

# XGBmodelfit(xgb1,X_train,y_train,X_test,y_test)

param_grid = {
 'max_depth':range(3,10,2),
 'min_child_weight':range(1,6,2)
}
# param_grid = {
#  'max_depth':[7,8],
#  'min_child_weight':[4,5]
# }
gsearch1 = GridSearchCV(estimator = XGBClassifier(
       learning_rate =0.1, n_estimators=140, max_depth=9,
       min_child_weight=1, gamma=0, subsample=0.8,colsample_bytree=0.8,
       objective= 'binary:logistic', nthread=4,scale_pos_weight=1, seed=27),
       param_grid=param_grid,cv=5)
gsearch1.fit(X_train,y_train)
print(gsearch1.best_params_,gsearch1.best_score_)

天上飞下一毛雪

关注

1
点赞
踩
22

收藏

觉得还不错? 一键收藏
0
评论
XGBOOST（超参网格搜索）

import xgboost as xgbfrom xgboost.sklearn import XGBClassifierfrom sklearn.model_selection import train_test_splitfrom sklearn import metricsfrom sklearn.model_selection import GridSearchCVimport...
复制链接

扫一扫

专栏目录