XGBoost调参(Parameter Tuning in XGBoost)

最新推荐文章于 2023-10-23 15:24:11 发布

从小跑开始

最新推荐文章于 2023-10-23 15:24:11 发布

阅读量973

点赞数

文章标签： xgbt 调参 XGBoost parameter tuning

本文链接：https://blog.csdn.net/qq_36793545/article/details/85109033

版权

from sklearn.model_selection import train_test_split
from sklearn import metrics
from  sklearn.datasets  import  make_hastie_10_2
from  sklearn.ensemble  import  GradientBoostingClassifier
from xgboost.sklearn import XGBClassifier
from sklearn.model_selection import GridSearchCV

##载入示例数据 10维度
X, y = make_hastie_10_2(random_state=0)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)##test_size测试集合所占比例

默认xgbt参数

auc_Score=[]
accuracy=[]
clf = XGBClassifier()
clf.fit(X_train, y_train)
y_pre= clf.predict(X_test)
y_pro= clf.predict_proba(X_test)[:,1] 
print( "AUC Score : %f" % metrics.roc_auc_score(y_test, y_pro) )
print("Accuracy : %.4g" % metrics.accuracy_score(y_test, y_pre))
auc_Score.append(metrics.roc_auc_score(y_test, y_pro))
accuracy.append(metrics.accuracy_score(y_test, y_pre))

第一步调节max_depth，min_child_weight

param_test1 = {
 'max_depth':range(3,10),
 'min_child_weight':range(1,12)
}
gsearch1 = GridSearchCV(estimator = XGBClassifier( learning_rate =0.1, n_estimators=140, max_depth=5,
 min_child_weight=1, gamma=0, subsample=0.8, colsample_bytree=0.8,
 objective= 'binary:logistic', nthread=4, scale_pos_weight=1, seed=27), 
 param_grid = param_test1, scoring='roc_auc',n_jobs=4,iid=False, cv=5)
gsearch1.fit(X_train,y_train)
#gsearch1.grid_scores_, gsearch1.best_params_, gsearch1.best_score_
gsearch1.best_params_, gsearch1.best_score_

在这里插入图片描述

第二步调节gamma

param_test2 = {
 'gamma':[i/10.0 for i in range(0,5)]
}
gsearch2 = GridSearchCV(estimator = XGBClassifier( learning_rate =0.1, n_estimators=140, max_depth=9,
 min_child_weight=5, gamma=0, subsample=0.8, colsample_bytree=0.8,
 objective= 'binary:logistic', nthread=4, scale_pos_weight=1,seed=27), 
 param_grid = param_test2, scoring='roc_auc',n_jobs=4,iid=False, cv=5)
gsearch2.fit(X_train,y_train)
auc_Score.append(gsearch2.best_score_)
gsearch2.best_params_, gsearch2.best_score_

在这里插入图片描述

第三步调节subsample，colsample_bytree

param_test3 = {
 'subsample':[i/10.0 for i in range(6,10)],
 'colsample_bytree':[i/10.0 for i in range(6,10)]
}
gsearch3 = GridSearchCV(estimator = XGBClassifier( learning_rate =0.1, n_estimators=140, max_depth=9,
 min_child_weight=5, gamma=0.3, subsample=0.8, colsample_bytree=0.8,
 objective= 'binary:logistic', nthread=4, scale_pos_weight=1,seed=27), 
 param_grid = param_test3, scoring='roc_auc',n_jobs=4,iid=False, cv=5)
gsearch3.fit(X_train,y_train)
auc_Score.append(gsearch3.best_score_)
gsearch3.best_params_, gsearch3.best_score_

在这里插入图片描述

第四步调节reg_alpha

param_test4 = {
 'reg_alpha':[1e-5, 1e-2, 0.001, 0.005, 0.01, 0.05, 1, 100]
}
gsearch4 = GridSearchCV(estimator = XGBClassifier( learning_rate =0.1, n_estimators=140, max_depth=9,
 min_child_weight=5, gamma=0.3, subsample=0.8, colsample_bytree=0.7,
 objective= 'binary:logistic', nthread=4, scale_pos_weight=1,seed=27), 
 param_grid = param_test4 ,scoring='roc_auc',n_jobs=4,iid=False, cv=5)
gsearch4.fit(X_train,y_train)
auc_Score.append(gsearch4.best_score_)
gsearch4.best_params_, gsearch4.best_score_

在这里插入图片描述

第五步调节n_estimators

param_test5 = {
 'n_estimators':[100,140,200,500,1000,1500]
}
gsearch5 = GridSearchCV(estimator = XGBClassifier( learning_rate =0.1, n_estimators=140, max_depth=9,
                                                  reg_alpha = 1e-05,
 min_child_weight=5, gamma=0.3, subsample=0.8, colsample_bytree=0.7,
 objective= 'binary:logistic', nthread=4, scale_pos_weight=1,seed=27), 
 param_grid = param_test5, scoring='roc_auc',n_jobs=4,iid=False, cv=5)
gsearch5.fit(X_train,y_train)
auc_Score.append(gsearch5.best_score_)
gsearch5.best_params_, gsearch5.best_score_

在这里插入图片描述

第六步调节n_estimators

param_test6 = {
'learning_rate':[0.01,0.02,0.05,0.1,0.3]
}
gsearch6 = GridSearchCV(estimator = XGBClassifier( learning_rate =0.1, n_estimators=200, max_depth=9,
                                                   reg_alpha = 1e-05,
 min_child_weight=5, gamma=0.3, subsample=0.8, colsample_bytree=0.7,
 objective= 'binary:logistic', nthread=4, scale_pos_weight=1,seed=27), 
 param_grid = param_test6, scoring='roc_auc',n_jobs=4,iid=False, cv=5)
gsearch6.fit(X_train,y_train)
auc_Score.append(gsearch6.best_score_)
gsearch6.best_params_, gsearch6.best_score_

在这里插入图片描述

最后

#最优
clf = XGBClassifier(
 learning_rate =0.1, #默认0.3
 n_estimators=200, #树的个数
 max_depth=9,
 min_child_weight=5,
 gamma=0.3,
 subsample=0.8,
 colsample_bytree=0.7,
 objective= 'binary:logistic', #逻辑回归损失函数
 nthread=4,  #cpu线程数
 reg_alpha = 1e-05,
 scale_pos_weight=1,
 seed=27)  #随机种子
clf.fit(X_train, y_train)
y_pre= clf.predict(X_test)
y_pro= clf.predict_proba(X_test)[:,1] 
print ("AUC Score : %f" % metrics.roc_auc_score(y_test, y_pro) )
print("Accuracy : %.4g" % metrics.accuracy_score(y_test, y_pre) )
auc_Score.append(metrics.roc_auc_score(y_test, y_pro))
accuracy.append(metrics.accuracy_score(y_test, y_pre))

一张参数重要性的图
在这里插入图片描述

参数的介绍：https://www.analyticsvidhya.com/blog/2016/03/complete-guide-parameter-tuning-xgboost-with-codes-python/
代码：https://github.com/sleepingxin/code/blob/master/xgbt调参.ipynb
参考：https://github.com/lytforgood/MachineLearningTrick/blob/master/xgboost调参演示.md

从小跑开始

关注

0
点赞
踩
4

收藏

觉得还不错? 一键收藏
0
评论
XGBoost调参(Parameter Tuning in XGBoost)

from sklearn.model_selection import train_test_splitfrom sklearn import metricsfrom sklearn.datasets import make_hastie_10_2from sklearn.ensemble import GradientBoostingClassifierfrom xgboos...
复制链接

扫一扫