1、LightGBM原生接口----分类模型
import lightgbm as lgb
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import numpy as np
iris = load_iris()
data=iris.data
target = iris.target
X_train, X_test, y_train, y_test =train_test_split(data, target, test_size=0.2)
# 创建成lgb特征的数据集格式
lgb_train = lgb.Dataset(X_train, y_train)
lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train)
# 将参数写成字典下形式
params = {
'boosting_type': 'gbdt', # 设置提升类型
'objective': 'multiclass', # 目标函数
'num_leaves': 31, # 一棵树的叶子节点数
'learning_rate': 0.05, # 学习速率
'num_class': 3, # 类别数
'feature_fraction': 0.9, # 建树的特征采样比例
'bagging_fraction': 0.8, # 建树的样本采样比例
'bagging_freq': 5, # 每k次迭代执行bagging
}
# 训练 cv and train
gbm = lgb.train(params, lgb_train, num_boost_round=20, valid_sets=lgb_eval, early_stopping_rounds=5)
y_pred = gbm.predict(X_test, num_iteration=gbm.best_iteration)
print(accuracy_score(y_test, np.argmax(y_pred, axis=1)))
[1] valid_0’s multi_logloss: 1.03982
Training until validation scores don’t improve for 5 rounds.
[2] valid_0’s multi_logloss: 0.980791
[3] valid_0’s multi_logloss: 0.927019
[4] valid_0’s multi_logloss: 0.877524
[5] valid_0’s multi_logloss: 0.830831
[6] valid_0’s multi_logloss: 0.785194
[7] valid_0’s multi_logloss: 0.743296
[8] valid_0’s multi_logloss: 0.704658
[9] valid_0’s multi_logloss: 0.667132
[10] valid_0’s multi_logloss: 0.632111
[11] valid_0’s multi_logloss: 0.598285
[12]