Lightgbm是基于决策树的分布式梯度提升框架,以选取最大信息增益作为特征选择的目标。
它主要的参数有 【转自lightgbm参数说明】
关于lightgbm params的说明
task = train #配置目标是用于训练
boosting_type = gbdt # 训练方式
objective = binary #目标:二分类
metric = binary_logloss,auc #损失函数
metric_freq = 1 #度量输出的频度
is_training_metric = true #true代表需要输出训练数据的度量,又称tranining_metric, train_metric
max_bin = 255 #特征最大分割
data = /Users/shuubiasahi/Documents/githup/LightGBM/examples/binary_classification/binary.train #训练数据地址
num_trees = 100 #树的棵树
learning_rate = 0.1 #学习率
num_leaves = 63 #决策树的叶子节点数。
(Lightgbm用决策树叶子节点数来确定树的复杂度,而XGboost用max_depth确定树的复杂度)
其中,num_leaves的设置与数据集特征总数有关,num_leaves尽量小于完全二叉树的叶子节点数,否则容易过拟合。
tree_learner = serial
num_threads = 8 # 最大线程个数
feature_fraction = 0.8 # feature sub-sample, will random select 80% feature to train on each iteration, alias: sub_feature
bagging_freq = 5 # Support bagging (data sub-sample), will perform bagging every 5 iterations
bagging_fraction = 0.8 # Bagging farction, will random select 80% data on bagging, alias: sub_row
min_data_in_leaf = 50 # minimal number data for one leaf, use this to deal with over-fit, alias : min_data_per_leaf, min_data
min_sum_hessian_in_leaf = 5.0 # minial sum hessians for one leaf, use this to deal with over-fit
is_enable_sparse = true # save memory and faster speed for sparse feature, alias: is_sparse
use_two_round_loading = false # when data is bigger than memory size, set this to true. otherwise set false will have faster speed
# alias: two_round_loading, two_round
is_save_binary_file = false # true if need to save data to binary file and application will auto load data from binary file next time
# alias: is_save_binary, save_binary
#模型输出文件
output_model = /Users/shuubiasahi/Documents/githup/LightGBM/examples/binary_classification/LightGBM_model.txt
machine_list_file = /Users/shuubiasahi/Documents/githup/LightGBM/examples/binary_classification/LightGBM_model.txt/mlist.txt