S k l e a r n C v Sklearn_Cv SklearnCv
# 运行 xgboost安装包中的示例程序
from xgboost import XGBClassifier
# 加载LibSVM格式数据模块
from sklearn.datasets import load_svmlight_file
#from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score
from matplotlib import pyplot
# read in data,数据在xgboost安装的路径下的demo目录,现在copy到代码目录下的data目录
my_workpath = './data/'
X_train,y_train = load_svmlight_file(my_workpath + 'agaricus.txt.train')
X_test,y_test = load_svmlight_file(my_workpath + 'agaricus.txt.test')
设置模型训练参数
暂时没用
# specify parameters via map
param = {'max_depth':2, 'eta':1, 'silent':0, 'objective':'binary:logistic' }
print(param)
构造模型
# 设置boosting迭代计算次数
num_round = 2
#num_round = range(1, 101)
#param_grid = dict(n_estimators=num_round)
#bst = XGBClassifier(param)
bst =XGBClassifier(max_depth=2, learning_rate=0.1,n_estimators=num_round,
silent=True, objective='binary:logistic')
交叉验证
会比较慢
# stratified k-fold cross validation evaluation of xgboost model
#kfold = KFold(n_splits=10, random_state=7)
kfold = StratifiedKFold(n_splits=10, random_state=7)
#fit_params = {'eval_metric':"logloss"}
#results = cross_val_score(bst, X_train, y_train, cv=kfold, fit_params)
results = cross_val_score(bst, X_train, y_train, cv=kfold)
print(results)
print("CV Accuracy: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))