import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error,make_scorer
import xgboost as xgb
from sklearn.model_selection import GridSearchCV
import warnings
warnings.filterwarnings('ignore')
train=r'C:\Users\10991\Desktop\kaggle\baoxian_train.csv'
test=r'C:\Users\10991\Desktop\kaggle\baoxian_test.csv'
train=pd.read_csv(train)
train['log_loss']=np.log(train['loss'])
features=[x for x in train.columns if x not in ['loss','id','log_loss']]
cat_features=[x for x in train.select_dtypes(include=['object']).columns if x not in ['loss','id','log_loss']]
num_features=[x for x in train.select_dtypes(exclude=['object']).columns if x not in ['loss','id','log_loss']]
print(len(cat_features))
print(len(num_features))
ntrain=train.shape[0]
train_x=train[features]
train_y=train['log_loss']
for c in range(len(cat_features)):
train_x[cat_features[c]]=train_x[cat_features[c]].astype('category').cat.codes
print(train_x.shape)
print(train_y.shape
def xg_eval_mae(yhat,dtrain): #平均绝对误差来衡量效果
y=dtrain.get_label()
return &#
XGBoost调参
最新推荐文章于 2024-04-06 11:01:26 发布