xgboost实例

本文详细介绍了如何使用Python库XGBoost进行机器学习实战,涵盖了数据预处理、模型训练、参数调优以及模型评估等关键步骤,帮助读者深入理解XGBoost的工作原理和应用技巧。
摘要由CSDN通过智能技术生成
# _*_coding:utf-8 _*_
# @project:py_project
# @name:7372
# @date:2022/2/13 11:07
# @Author:Ly
import pandas as pd
import os
import numpy as np
import xgboost as xgb
from sklearn.metrics import recall_score,accuracy_score,roc_auc_score,f1_score
from sklearn.model_selection import train_test_split
pd.set_option('display.max_columns', 100)
pd.set_option('display.width', 500)
data_info=pd.ExcelFile(r'C:\Users\Administrator\Desktop\7372-1.xlsx')
df=data_info.parse('Sheet1')
# print(df.head(10))
# print(len(df['map_id'].unique()))
# print(df.columns)
# df['new_index']=df.apply(lambda row :str(row['map_id'])+str(row['lable']),axis=1)
# for ind in df['new_index'].unique():
#     tmp=df.loc[df['new_index']==ind]
#     print(tmp.describe())
# df_mean=df.groupby(['map_id','lable']).mean()
# df_std=df.groupby(['map_id','lable']).std()
# print(df_std)
mapid=pd.get_dummies(df['map_id'])
mapid.rename(columns={i: 'mapid_' + str(i) for i in mapid.columns}, inplace=True)
data=df.drop(columns=['informat_accont_id','fintime','gameseq','rankinggame','kartid','map_id'],axis=1)
data_new = pd.concat([mapid, data], axis=1)
x = data_new.loc[:, data_new.columns != "lable"]
fea_imp = x.columns
y = data_new.loc[:, 'lable']


x_train, x_valid, y_train, y_valid = train_test_split(x, y, random_state=10, test_size=0.1,stratify=y)
x_train, x_test, y_train, y_test = train_test_split(x_train, y_train, random_state=10, test_size=0.1,stratify=y_train)
y_train = y_train.values.reshape(-1, 1)
y_valid = y_valid.values.reshape(-1, 1)
y_test = y_test.values.reshape(-1, 1)

dtrain = xgb.DMatrix(x_train, label=y_train, feature_names=fea_imp)
dvalid = xgb.DMatrix(x_valid, label=y_valid, feature_names=fea_imp)
dtest = xgb.DMatrix(x_test, label=y_test, feature_names=fea_imp)
num_round = 100
watchlist = [(dtrain, 'train'), (dvalid, 'eval')]

param = {'max_depth': 3,
         'learning_rate': 0.1,
         'objective': 'binary:logistic',  # 此默认参数与 XGBClassifier 不同
         'booster': 'gbtree',
         'gamma': 0,
         'min_child_weight': 1,
         'subsample': 1,
         'colsample_bytree': 1,
         'reg_alpha': 0,
         'reg_lambda': 1,
         'random_state': 2,
         'eval_metric':'auc'}
xg = xgb.train(param, dtrain, num_round, evals=watchlist, early_stopping_rounds=10)
#y_true, y_pred 是0,1值
from sklearn.metrics import precision_score
# calculate precision
y_prob_pred=xg.predict(dtest)
threshold=0.6
y_pred=[1 if x >threshold else 0 for x in y_prob_pred  ]
y_true=y_test
# print(y_pred)
print(f'阈值为:{threshold}的precision_score---->:',precision_score(y_true, y_pred, average='macro'))  # 0.2222222222222222
print(f'阈值为:{threshold}的recall_score------->:',recall_score(y_true, y_pred, average='macro'))  # 0.2222222222222222
print(f'阈值为:{threshold}的f1_score----------->:',f1_score(y_true, y_pred, average='macro'))
print(f'阈值为:{threshold}的accuracy_score----->:',accuracy_score(y_true, y_pred))
xgb_auc=roc_auc_score(y_true,y_prob_pred) # y_gbm_pred 是预测的概率
print(f'阈值为:{threshold}的xgb_auc------------>:',xgb_auc)

threshold=0.65
y_pred=[1 if x >threshold else 0 for x in y_prob_pred  ]
y_true=y_test
# print(y_pred)
print(f'阈值为:{threshold}的precision_score---->:',precision_score(y_true, y_pred, average='macro'))  # 0.2222222222222222
print(f'阈值为:{threshold}的recall_score------->:',recall_score(y_true, y_pred, average='macro'))  # 0.2222222222222222
print(f'阈值为:{threshold}的f1_score----------->:',f1_score(y_true, y_pred, average='macro'))
print(f'阈值为:{threshold}的accuracy_score----->:',accuracy_score(y_true, y_pred))
xgb_auc=roc_auc_score(y_true,y_prob_pred) # y_gbm_pred 是预测的概率
print(f'阈值为:{threshold}的xgb_auc------------>:',xgb_auc)

im1 = pd.DataFrame({'importance': xg.get_score(importance_type='gain').values(), 'var': xg.get_score(importance_type='gain').keys()})

im2 = pd.DataFrame({'importance': xg.get_score(importance_type='weight').values(), 'var': xg.get_score(importance_type='weight').keys()})

im = im1.sort_values(by='importance', ascending=False)
print(im)
im = im2.sort_values(by='importance', ascending=False)
print(im)





评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值