python xgboost 调参

def modelfit(model, X_train, X_test, y_train, y_test,useTrainCV, eval_metric='auc',  cv_folds=4,
             early_stopping_rounds=20):
    #early_stopping_rounds  在20轮迭代里没有提升的话,就停止
    print('Model eval_metric is % s' % eval_metric)

    if useTrainCV:
        xgb_param = model.get_xgb_params()
        xgtrain = xgb.DMatrix(X_train, label=y_train)
        cvresult = xgb.cv(xgb_param, xgtrain, num_boost_round=model.get_params()['n_estimators'], nfold=cv_folds,
                          metrics=eval_metric, early_stopping_rounds=early_stopping_rounds)
        #cvresult.to_csv(cvresult_path, index_label='n_estimators')
        print("Best Iteration: %d" % cvresult.shape[0])
        model.set_params(n_estimators=cvresult.shape[0])

    # Fit the algorithm on the data
    model.fit(X_train, y_train, eval_metric=eval_metric)

    Report_Df=myscoring(model, X_train, y_train, X_test, y_test,threshold=0.5)
    print(Report_Df)
    return model

def myscoring(model, X_train, y_train, X_test, y_test,threshold=0.5):
    print("threshold %s"%threshold)
    # Predict training set:
    predprob_train = model.predict_proba(X_train)[:, 1]
    y_train_pred = (predprob_train >= threshold)*1
    #predictions_train = model.predict(X_train)

    # Predict test set:
    predprob_test = model.predict_proba(X_test)[:, 1]
    y_test_pred = (predprob_test >= threshold)*1
    #predictions_test = model.predict(X_test)

    # Print model report:
    # print ( "\nModel Report" )
    Report_Df = pd.DataFrame(
        data={'evaluating indicator': ['Accuracy', 'AUC Score', 'Recall', 'F1-score', 'Precesion'],
              'Train': [metrics.accuracy_score(y_train, y_train_pred), metrics.roc_auc_score(y_train, predprob_train),
                        metrics.recall_score(y_train, y_train_pred), metrics.f1_score(y_train, y_train_pred),
                        metrics.precision_score(y_train, y_train_pred)],
              'Test': [metrics.accuracy_score(y_test, y_test_pred),
                       metrics.roc_auc_score(y_test, predprob_test),
                       metrics.recall_score(y_test, y_test_pred), metrics.f1_score(y_test, y_test_pred),
                       metrics.precision_score(y_test, y_test_pred)]}
    )

    # print(Report_Df[['evaluating indicator','Train','Test']])
    return Report_Df[['evaluating indicator', 'Train', 'Test']]

##特征重要性输出
def features_importance(xgb,features):
    features_importance=[]
    for each in zip(features,xgb.feature_importances_):
        features_importance.append({"feature":each[0],"importance":each[1]})
    features_importance=pd.DataFrame(sorted(features_importance,key=(lambda x:x["importance"]),reverse=True))
    features_importance=features_importance[features_importance["importance"]>0]
    return features_importance

##给定基本参数,通过model_fit得到合适的n_estimator参数,
##网格调参或者贝叶斯调参
##定义一个简单的网格搜索
from sklearn.model_selection import GridSearchCV
def Gcv(X_train, X_test, y_train, y_test,model=XGBClassifier(scale_pos_weight=452/3573,objective='binary:logistic')
        ,params={'learning_rate ': [0.05,0.1,0.3],
                    'n_estimators':[50,60,70],
                 'max_depth':[3,4,5,6],
                    'subsample':[0.6,0.75],
                    'colsample_bytree':[0.5,0.6,0.7] 
                 
                }):
    
    #     X_train, X_test,= X_train.reset_index(drop=True), X_test.reset_index(drop=True)
    #     y_train, y_test = y_train.reset_index(drop=True), y_test.reset_index(drop=True)
    
    #X_train, X_test, y_train, y_test = X_train.fillna(0), X_test.fillna(0), y_train.fillna(0), y_test.fillna(0)
    #X_train, X_test, y_train, y_test = X_train.astype('float64'), X_test.astype('float64'), y_train.astype('float64'), y_test.astype('float64')
    
    gsCv=GridSearchCV(model,param_grid=params)
    
    gsCv.fit(X_train,y_train)
    
    print(gsCv.best_score_)
    print(gsCv.best_params_)
    
    return gsCv.best_params_

def model_paramadjust_train(X_train, y_train,X_test, y_test,class_weight=1,thred=0.5):
        params=Gcv(X_train, X_test, y_train,y_test)
        model = XGBClassifier(scale_pos_weight=class_weight,objective='binary:logistic') 
        model.set_params(**params)
        model.fit(X_train, y_train, early_stopping_rounds=10, eval_metric="auc",eval_set=[(X_test, y_test)])
        #y_test_predict =  model.predict_proba(X_test)[:, 1]
        return model, myscoring(model,X_train, y_train, X_test, y_test,threshold=thred)

##训练集测试集指标计算

##特征重要性绘图

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
XGBoost是一种基于决策树的机器学习算法,它在许多Kaggle竞赛中表现出色。调整XGBoost模型的参数可以显著提高模型的准确性。以下是用Python实现XGBoost调参的步骤: 1.导入需要的库和数据集 ```python import pandas as pd import xgboost as xgb from sklearn.model_selection import GridSearchCV, train_test_split from sklearn.metrics import accuracy_score # 导入数据 data = pd.read_csv('data.csv') ``` 2.准备训练和测试数据集 ```python # 分离自变量和因变量 X = data.drop('label', axis=1) y = data['label'] # 分割训练和测试数据集 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) ``` 3.设置参数空间 ```python # 设置参数空间 parameters = { 'max_depth': [3, 4, 5], 'learning_rate': [0.1, 0.01, 0.001], 'n_estimators': [50, 100, 200], 'min_child_weight': [1, 2, 3] } ``` 4.实例化XGBoost分类器和网格搜索 ```python # 实例化XGBoost分类器 xgb_clf = xgb.XGBClassifier() # 实例化网格搜索 grid_search = GridSearchCV(estimator=xgb_clf, param_grid=parameters, scoring='accuracy', cv=5) ``` 5.训练模型并输出最佳参数 ```python # 训练模型 grid_search.fit(X_train, y_train) # 输出最佳参数 print("Best parameters found: ", grid_search.best_params_) ``` 6.使用最佳参数对模型进行预测并输出准确性 ```python # 使用最佳参数对模型进行预测 y_pred = grid_search.predict(X_test) # 输出准确性 accuracy = accuracy_score(y_test, y_pred) print("Accuracy:", accuracy) ``` 通过以上步骤,你可以使用Python实现XGBoost调参并优化模型。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值