1.Xgboost建模,sklearn评估
2.网格搜索交叉验证找最优参数
3.early-stop早停止
4.特征重要度
5.并行训练加速
#预估器建模方式:sklearn与XGboost配合使用
#xgboost建模,sklearn评估
import pickle
import xgboost as xgb
import numpy as np
from sklearn.model_selection import KFold, train_test_split, GridSearchCV
from sklearn.metrics import confusion_matrix, mean_squared_error
from sklearn.datasets import load_iris, load_digits, load_boston
rng = np.random.RandomState(31337)
#二分类:混淆矩阵
print("数字0和1的二分类问题")
digits = load_digits(2) #2表示类别的意思
y = digits['target']
X = digits['data']
#数据切分对象
kf = KFold(n_splits=2,shuffle=True,random_state=rng)#对数据进行切分
print("在2着数据上的交叉验证")
#2折交叉验证
for train_index,test_index in kf.split(X):
xgb_model = xgb.XGBClassifier().fit(X[train_index],y[train_index])#初始化XGB分类器,拟合数据
predictions = xgb_model.predict(X[test_index])#预估
actuals = y[test_index]
print("混淆矩阵")
print(confusion_matrix(actuals,predictions))
#多分类,混淆矩阵
print('\nIris:多分类'