由于lgbm.predict_proba输出的结果维度和样本的数据集不一致,导致dataframe进行列赋值的时候报错,下面可以通过修改num_class来验证
# -*- coding: utf-8 -*- """ @Time : 2022/7/29 14:48 @Author: Breeze @File : 多分类测试输出.py """ import sys from lightgbm import LGBMClassifier import numpy as np import os print(sys.version) clf_multiclass = LGBMClassifier() n_estimators = 100 lr = 0.01 max_depth = 3 l1 = 0.1 l2 = 0.1 subsample_for_bin = 32 min_child_sample = 32 num_class = 3 wd_namelist_model = LGBMClassifier( objective='multiclass', # 'multilogloss num_class=num_class, # 'regression' for LGBMRegressor, 'binary' or 'multiclass' for LGBMClassifier, 'lambdarank' for LGBMRanker. n_estimators=n_estimators, learning_rate=lr, num_leaves=2**max_depth-1, # (0, 2^max_depth - 1] colsample_bytree=0.6, subsample=0.6, max_depth=max_depth, # 10 reg_alpha=l1, # 0.04 reg_lambda=l2, subsample_for_bin=subsample_for_bin, subsample_freq=32, # 12 min_split_gain=0.01, min_child_weight=0.01, min_child_sample=min_child_sample, silent=False, verbose=1, # importance_type='gain', random_state=2022) train_data = np.random.rand(50000, 100) # 500 entities, each contains 100 features train_label = np.random.randint(num_class, size=50000) # 5 targets val_data = np.random.rand(5000, 100) val_label = np.random.randint(num_class, size=5000) # wd_namelist_model.fit(train_data,train_label) wd_namelist_model.fit(train_data, train_label # , sample_weight=train_weight , eval_set=[(train_data, train_label),(val_data, val_label)] # , eval_sample_weight=[train_weight, val_weight] # , eval_metric=['auc'] , eval_metric=['auc_mu'] # binary_logloss multi_logloss,cross_entropy , verbose=10 , early_stopping_rounds=50) # val_pred = wd_namelist_model.predict(val_data) val_pred = wd_namelist_model.predict_proba(val_data)# [0,:] print(val_pred.shape)
如果想要用多分类,来训练二分类目标变量,可以通过如下方法获取概率值
wd_namelist_model.predict_proba(val_data)[0,:len(val_data)]