废话不多说,直接上代码
train = pd.read_csv('train_xy.csv') traint, testst = train_test_split(train, test_size=0.4, random_state=1)#分割数据集
params = { 'booster': 'gbtree', 'objective': 'binary:logistic', 'eval_metric': 'auc', 'gamma': 0, 'max_depth': depth, # 4 'lambda': 2, 'subsample': 0.7, 'colsample_bytree': 0.7, 'min_child_weight': weight, # 6 'silent': 1, 'eta': 0.007, 'seed': 1000, # 22很高 'nthread': 4, }
plst = list(params.items()) num_rounds = rounds # 迭代次数 1000 target = testst.y del testst['y'] y = traint.y X = traint.drop(['y'], axis=1) xgb_train = xgb.DMatrix(X, label=y)
xgb_test = xgb.DMatrix(testst) model = xgb.train(plst, xgb_train, num_rounds) # , watchlist)# ,early_stopping_rounds=100) model.save_model('xgb.model') # 用于存储训练出的模型 preds = model.predict(xgb_test, ntree_limit=model.best_ntree_limit) preds1 = model.predict(xgb_test1, ntree_limit=model.best_ntree_limit)
preds1 = np.round(preds1, 6) preds = pd.Series(preds) test_auc = metrics.roc_auc_score(target, preds) print(test_auc)