在Xgboost调参过程中,可以使用GridSearchCV()进行网格调参,不用很麻烦的进行手动调参。
下面这个例子是使用Xgboost进行回归任务时使用GridSearchCV().
import pandas as pd
import numpy as np
from sklearn.model_selection import GridSearchCV #网格搜索
from sklearn.model_selection import train_test_split,KFold #数据集划分,交叉验证
import xgboost as xgb #xgboost
import warnings
warnings.filterwarnings('ignore')
goal = 'people_index' #要预测的值
myid = 'ID'
#读入数据
train = pd.read_csv('D:/Xgboost/train.csv',index_col=0)
y_train = np.log1p(train[goal])
train.drop([myid,goal], axis=1, inplace=True)
x = train
y = y_train
seed = 7
test_size = 0.10
X_train, X_test, Y_train, Y_test = train_test_split(x, y, test_size=test_size, random_state=seed)
model = xgb.XGBRegressor()
learning_rate = [0.01,0.05,0.1] #学习率
n_estimators = [700, 900, 1100,1300]
max_depth = [6,10,15,20]
param_grid = dict(learning_rate = learning_rate,n_estimators = n_estimators,max_depth=max_depth)#转化为字典格式
kflod=KFold(n_splits=5,shuffle=True) #五折交叉验证
print("Begin Train")
grid_search = GridSearchCV(model,param_grid,scoring = 'neg_mean_squared_error',n_jobs = -1,cv = kflod)
#scoring是损失函数类型
grid_result = grid_search.fit(X_train, Y_train)
print("Best: %f using %s" % (grid_result.best_score_,grid_search.best_params_))
means = grid_result.cv_results_['mean_test_score']
params = grid_result.cv_results_['params']
for mean,param in zip(means,params):
print("%f with: %r" % (mean,param))