XGBOOST(超参网格搜索)

import xgboost as xgb
from xgboost.sklearn import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.model_selection import GridSearchCV
import pandas as pd
import return_data
from xgboost import plot_importance
from matplotlib import pyplot as plt
import warnings
warnings.filterwarnings("ignore")

#alg传入XGBOOST,X_train传入训练数据特征信息,Y_train传入训练数据标签信息  X_testdata最后要预测的值
def XGBmodelfit(alg, X_train, Y_train,X_test=None,Y_test=None,X_predictions=None,useTrainCV=True, cv_folds=5, early_stopping_rounds=200):
    if useTrainCV:
        xgb_param = alg.get_xgb_params()
        xgtrain = xgb.DMatrix(X_train, label=Y_train)
        cvresult = xgb.cv(xgb_param, xgtrain, num_boost_round=alg.get_params()['n_estimators'], nfold=cv_folds,
            metrics='auc', early_stopping_rounds=early_stopping_rounds, show_stdv=False)
        alg.set_params(n_estimators=cvresult.shape[0])

    #训练模型
    alg.fit(X_train, Y_train,eval_metric='auc')

    #预测结果:
    dtrain_predictions = alg.predict(X_test)  #输出 0 或 1
    # dtrain_predprob = alg.predict_proba(X_test)[:,1]   #输出概率

    #打印报告信息:
    print("\nModel Report")
    print("Accuracy  (Train) : %.4g" % metrics.accuracy_score(Y_test, dtrain_predictions))
    print("AUC Score (Train): %f" % metrics.roc_auc_score(Y_test, dtrain_predictions))
    print(alg)
    print("the best:")
    print(cvresult.shape[0])
    plot_importance(alg)
    plt.show()

    # feat_imp = pd.Series(alg.booster().get_fscore()).sort_values(ascending=False)
    # feat_imp.plot(kind='bar', title='Feature Importances')
    # plt.ylabel('Feature Importance Score')

dataset_X,dataset_Y=return_data.return_tarin_data()

X_train, X_test, y_train, y_test = train_test_split(dataset_X, dataset_Y,
                                                  test_size=0.2,
                                                  random_state=45)

xgb1 = XGBClassifier(
     learning_rate =0.1,
     n_estimators=1000,
     max_depth=5,
     min_child_weight=1,
     gamma=0,
     subsample=0.8,
     colsample_bytree=0.8,
     objective= 'binary:logistic',
     nthread=4,
     scale_pos_weight=1,
     seed=27)

# XGBmodelfit(xgb1,X_train,y_train,X_test,y_test)

param_grid = {
 'max_depth':range(3,10,2),
 'min_child_weight':range(1,6,2)
}
# param_grid = {
#  'max_depth':[7,8],
#  'min_child_weight':[4,5]
# }
gsearch1 = GridSearchCV(estimator = XGBClassifier(
       learning_rate =0.1, n_estimators=140, max_depth=9,
       min_child_weight=1, gamma=0, subsample=0.8,colsample_bytree=0.8,
       objective= 'binary:logistic', nthread=4,scale_pos_weight=1, seed=27),
       param_grid=param_grid,cv=5)
gsearch1.fit(X_train,y_train)
print(gsearch1.best_params_,gsearch1.best_score_)

  • 1
    点赞
  • 22
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值