1. 原理
2. 调参
3. 基本用法
# -*- coding: utf-8 -*-
import os
import pandas as pd
import xgboost as xgb
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")
params = {
'objective': 'binary:logistic', # 二分类, 输出0-1之间的概率
'eta': 0.1,
'colsample_bytree': 0.8, # 特征随机采样
'min_child_weight': 2,
'max_depth': 15,
'subsample': 0.8, # 样本采样
'alpha': 10,
'gamma': 30,
'lambda':50,
'verbose_eval': True,
'nthread': 8,
'eval_metric': 'auc',
'scale_pos_weight': 15,
'seed': 1,
'missing':-1
}
# 训练, 并保存模型
if os.path.exists("clf.model"):
clf = xgb.Booster(model_file = "clf.model")
else:
xgbTrain = xgb.DMatrix(train["all_features"], train["label"], params)
clf = xgb.train(xgbTrain)
clf.save_model("clf.model")
# 预测
prediction = clf.predict(xgb.DMatrix(test))
参考文献
https://zhuanlan.zhihu.com/p/25308120
http://www.cnblogs.com/zhangbojiangfeng/p/6428988.html