#构建模型
import lightgbm as lgb
from catboost import CatBoostClassifier
from sklearn.model_selection import StratifiedKFold,KFold
from sklearn.metrics import roc_auc_score
from sklearn.preprocessing import StandardScaler
import warnings
warnings.filterwarnings('ignore')
def cat_model(train_x, train_y, test_x,seed=512):
folds = 5
kf = StratifiedKFold(n_splits=folds, shuffle=True, random_state=seed)
oof = np.zeros([train_x.shape[0], 2])
test_predict = np.zeros([test_x.shape[0], 2])
feat_imp_df = pd.DataFrame()
feat_imp_df['feature'] = train_x.columns
feat_imp_df['imp'] = 0
auc_scores = []
# train_x = train_x.values
# train_y = train_y.values
for i, (train_index, valid_index) in enumerate(kf.split(train_x, train_y)):
print("|-----------------------------------------|")
print("| CAT Fold {} Training Start |".format(str(i + 1)))
print("|-----------------------------------------|")
trn_x, trn_y, val_x, val_y = train_x.iloc[train_index], train_y[train_index], train_x.iloc[valid_index], \
train_y[valid_index]
cat_params = {'learning_rate': 0.1, 'depth': 5, 'bootstrap_type': 'Bernoulli', 'random_seed': 2023,
'od_type': 'Iter', 'od_wait': 100, 'random_seed': 11, 'allow_writing_files': False,
'loss_function': 'Logloss','task_type':'GPU'}
#训练模型
cat_model = CatBoostClassifier(iterations=2000,**cat_params)
cat_model.fit(trn_x, trn_y, eval_set=(val_x, val_y),
cat_features=['num_28_bin','num_6_bin','num_10_bin','num_37_bin','num_14_bin','num_21_bin'], use_best_model=True, verbose=100)
val_pred = cat_model.predict_proba(val_x)
test_pred = cat_model.predict_proba(test_x)
feat_imp_df['imp'] += cat_model.feature_importances_ / folds
feat_imp_df = feat_imp_df.sort_values(by='imp', ascending=False).reset_index(drop=True)
feat_imp_df['rank'] = range(feat_imp_df.shape[0])
oof[valid_index] = val_pred
test_predict += test_pred / kf.n_splits
auc_score = roc_auc_score(np.array(val_y), np.array(val_pred[:, 1]))
print(auc_score)
auc_scores.append(auc_score)
print('AVG_auc :',sum(auc_scores)/len(auc_scores))
return oof, test_predict,feat_imp_df
# 训练 CAT模型
cat_oof, cat_test, cat_imp_df = cat_model(train_df[feature_cols], train_df['target'], test_df[feature_cols])
cat_pre_train = cat_oof[:,1]
cat_pre_test = cat_test[:,1]