XGBoost

最新推荐文章于 2024-05-13 14:24:10 发布

青枫冥月

最新推荐文章于 2024-05-13 14:24:10 发布

阅读量259

点赞数

分类专栏：笔记

本文链接：https://blog.csdn.net/qq_43396279/article/details/105456172

版权

笔记专栏收录该内容

8 篇文章 0 订阅

订阅专栏

XGBoost有两种方法建模训练

分类问题

使用xgboost原生库进行训练

import xgboost as xgb
from sklearn.metrics import accuracy_score

dtrain = xgb.DMatrix(f_train, label = l_train)
dtest = xgb.DMatrix(f_test, label = l_test)
param = {'max_depth':2, 'eta':1, 'silent':0, 'objective':'binary:logistic' }
num_round = 2
bst = xgb.train(param, dtrain, num_round)
train_preds = bst.predict(dtrain)
train_predictions = [round(value) for value in train_preds] #进行四舍五入的操作--变成0.1(算是设定阈值的符号函数)
train_accuracy = accuracy_score(l_train, train_predictions) #使用sklearn进行比较正确率
print ("Train Accuary: %.2f%%" % (train_accuracy * 100.0))

from xgboost import plot_importance #显示特征重要性
plot_importance(bst)#打印重要程度结果。
pyplot.show()

2、使用XGBClassifier进行训练

# 未设定早停止， 未进行矩阵变换
from xgboost import XGBClassifier
from sklearn.datasets import load_svmlight_file #用于直接读取svmlight文件形式， 否则就需要使用xgboost.DMatrix(文件名)来读取这种格式的文件
from sklearn.metrics import accuracy_score
from matplotlib import pyplot


num_round = 100
bst1 =XGBClassifier(max_depth=2, learning_rate=1, n_estimators=num_round, 
                   silent=True, objective='binary:logistic')
bst1.fit(f_train, l_train)

train_preds = bst1.predict(f_train)
train_accuracy = accuracy_score(l_train, train_preds)
print ("Train Accuary: %.2f%%" % (train_accuracy * 100.0))

preds = bst1.predict(f_test)
test_accuracy = accuracy_score(l_test, preds)
print("Test Accuracy: %.2f%%" % (test_accuracy * 100.0))

from xgboost import plot_importance #显示特征重要性
plot_importance(bst1)#打印重要程度结果。
pyplot.show()

当用XGBClassifier编译模型时，param前要加**，不然会报错
参考：XGBRegressor 参数调优

回归问题

from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import KFold
import numpy as np
folds = KFold(n_splits=5,random_state=111)
param = {'objective' : 'reg:squarederror',
         'learning_rate':    0.22,
         'max_depth':        9, 
         'min_child_weight': 1.5,
         'gamma':            3.12, 
         'subsample':        1,
         'colsample_bytree': 0.87,
         }
num_round=20
#筛选违约样本
train_Default=train[train.PD==1]
test_Default=test[test.PD==1]
X_train=train_Default[feature_columns]
y_train=train_Default[['EAD.']]
X_test=test_Default[feature_columns]
y_test=test_Default[['EAD.']]
SEED=111
rmse_Score=[]
for train_index, test_index in folds.split(X_train, y_train):
    fold_xtrain,fold_ytrain=X_train.values[train_index],y_train.values[train_index]
    fold_xtest,fold_ytest=X_train.values[test_index],y_train.values[test_index]
    
    bst=XGBRegressor(**param,n_estimators=num_round,random_state=SEED)#编译模型
    bst.fit(fold_xtrain,fold_ytrain)#训练
    preds = bst.predict(fold_xtest)#预测
    rmse = np.sqrt(mean_squared_error(fold_ytest, preds))
    rmse_Score.append(rmse)
    
RMSE=round((np.mean(rmse_Score)),4)
print('RMSE = %.4f' % RMSE)