跑完之后反向上分了,还不太懂贝叶斯调参,先跑通了,明天再看
import pandas as pd
import numpy as np
import tsfresh as tsf
from tsfresh import extract_features, select_features
from tsfresh.utilities.dataframe_functions import impute
import lightgbm as lgb
from sklearn.model_selection import StratifiedKFold, KFold
from sklearn.metrics import log_loss,f1_score,make_scorer
from sklearn.model_selection import train_test_split,cross_val_score
from sklearn.preprocessing import OneHotEncoder
import os
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
data_train = pd.read_csv("train.csv")
data_test_A = pd.read_csv("testA.csv")
print(data_train.shape)
print(data_test_A.shape)
(100000, 3)
(20000, 2)
# 心跳信号行转列
train_heartbeat_df = data_train["heartbeat_signals"].str.split(",", expand=True).stack()
train_heartbeat_df = train_heartbeat_df.reset_index() # 重置索引
# 将level_0设为索引,可表示id,level_1是一个时间步特征
train_heartbeat_df = train_heartbeat_df.set_index("level_0")
train_heartbeat_df.index.name = None
train_heartbeat_df = train_heartbeat_df.rename(columns={"level_1":"time", 0:"heartbeat_signals"})
train_heartbeat_df["heartbeat_signals"] = train_heartbeat_df["heartbeat_signals"].astype(float)
train_heartbeat_df
time | heartbeat_signals | |
---|---|---|
0 | 0 | 0.991230 |
0 | 1 | 0.943533 |
0 | 2 | 0.764677 |
0 | 3 | 0.618571 |
0 | 4 | 0.379632 |
... | ... | ... |
99999 | 200 | 0.000000 |
99999 | 201 | 0.000000 |
99999 | 202 | 0.000000 |
99999 | 203 | 0.000000 |
99999 | 204 | 0.000000 |
20500000 rows × 2 columns
data_train_label = data_train["label"]
data_train = data_train.drop("label", axis=1)
data_train = data_train.drop("heartbeat_signals", axis=1) # 去掉原数据的heartbeat列
data_train = data_train.join(train_heartbeat_df)
data_train
id | time | heartbeat_signals | |
---|---|---|---|
0 | 0 | 0 | 0.991230 |
0 | 0 | 1 | 0.943533 |
0 | 0 | 2 | 0.764677 |
0 | 0 | 3 | 0.618571 |
0 | 0 | 4 | 0.379632 |
... | ... | ... | ... |
99999 | 99999 | 200 | 0.000000 |
99999 | 99999 | 201 | 0.000000 |
99999 | 99999 | 202 | 0.000000 |
99999 | 99999 | 203 | 0.000000 |
99999 | 99999 | 204 | 0.000000 |
20500000 rows × 3 columns
#测试集处理
# 心跳信号行转列
test_heartbeat_df = data_test_A["heartbeat_signals"].str.split(",", expand=True).stack()
test_heartbeat_df = test_heartbeat_df.reset_index() # 重置索引
# 将level_0设为索引,可表示id,level_1是一个时间步特征
test_heartbeat_df = test_heartbeat_df.set_index("level_0")
test_heartbeat_df.index.name = None
test_heartbeat_df = test_heartbeat_df.rename(columns={"level_1":"time", 0:"heartbeat_signals"})
test_heartbeat_df["heartbeat_signals"] = test_heartbeat_df["heartbeat_signals"].astype(float)
test_heartbeat_df
time | heartbeat_signals | |
---|---|---|
0 | 0 | 0.991571 |
0 | 1 | 1.000000 |
0 | 2 | 0.631816 |
0 | 3 | 0.136230 |
0 | 4 | 0.041420 |
... | ... | ... |
19999 | 200 | 0.000000 |
19999 | 201 | 0.000000 |
19999 | 202 | 0.000000 |
19999 | 203 | 0.000000 |
19999 | 204 | 0.000000 |
4100000 rows × 2 columns
data_test_A = data_test_A.drop("heartbeat_signals", axis=1) # 去掉原数据的heartbeat列
data_test_A = data_test_A.join(test_heartbeat_df)
data_test_A
id | time | heartbeat_signals | |
---|---|---|---|
0 | 100000 | 0 | 0.991571 |
0 | 100000 | 1 | 1.000000 |
0 | 100000 | 2 | 0.631816 |
0 | 100000 | 3 | 0.136230 |
0 | 100000 | 4 | 0.041420 |
... | ... | ... | ... |
19999 | 119999 | 200 | 0.000000 |
19999 | 119999 | 201 | 0.000000 |
19999 | 119999 | 202 | 0.000000 |
19999 | 119999 | 203 | 0.000000 |
19999 | 119999 | 204 | 0.000000 |
4100000 rows × 3 columns
train_features = pd.read_csv("train_features.csv")
test_features = pd.read_csv("test_features.csv")
# 按照特征和数据label之间的相关性进行特征选择
train_features_filtered = select_features(train_features, data_train_label)
train_features_filtered
heartbeat_signals__sum_values | heartbeat_signals__fft_coefficient__attr_"abs"__coeff_38 | heartbeat_signals__fft_coefficient__attr_"abs"__coeff_37 | heartbeat_signals__fft_coefficient__attr_"abs"__coeff_36 | heartbeat_signals__fft_coefficient__attr_"abs"__coeff_35 | heartbeat_signals__fft_coefficient__attr_"abs"__coeff_34 | heartbeat_signals__fft_coefficient__attr_"abs"__coeff_33 | heartbeat_signals__fft_coefficient__attr_"abs"__coeff_32 | heartbeat_signals__fft_coefficient__attr_"abs"__coeff_31 | heartbeat_signals__fft_coefficient__attr_"abs"__coeff_30 | ... | heartbeat_signals__fft_coefficient__attr_"abs"__coeff_84 | heartbeat_signals__fft_coefficient__attr_"imag"__coeff_97 | heartbeat_signals__fft_coefficient__attr_"abs"__coeff_90 | heartbeat_signals__fft_coefficient__attr_"abs"__coeff_94 | heartbeat_signals__fft_coefficient__attr_"abs"__coeff_92 | heartbeat_signals__fft_coefficient__attr_"real"__coeff_97 | heartbeat_signals__fft_coefficient__attr_"abs"__coeff_75 | heartbeat_signals__fft_coefficient__attr_"real"__coeff_88 | heartbeat_signals__fft_coefficient__attr_"real"__coeff_92 | heartbeat_signals__fft_coefficient__attr_"real"__coeff_83 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 38.927945 | 0.660949 | 1.090709 | 0.848728 | 1.168685 | 0.982133 | 1.223496 | 1.236300 | 1.104172 | 1.497129 | ... | 0.531883 | -0.047438 | 0.554370 | 0.307586 | 0.564596 | 0.562960 | 0.591859 | 0.504124 | 0.528450 | 0.473568 |
1 | 19.445634 | 1.718217 | 1.280923 | 1.850706 | 1.460752 | 1.924501 | 1.925485 | 1.715938 | 2.079957 | 1.818636 | ... | 0.563590 | -0.109579 | 0.697446 | 0.398073 | 0.640969 | 0.270192 | 0.224925 | 0.645082 | 0.635135 | 0.297325 |
2 | 21.192974 | 1.814281 | 1.619051 | 1.215343 | 1.787166 | 2.146987 | 1.686190 | 1.540137 | 2.291031 | 2.403422 | ... | 0.712487 | -0.074042 | 0.321703 | 0.390386 | 0.716929 | 0.316524 | 0.422077 | 0.722742 | 0.680590 | 0.383754 |
3 | 42.113066 | 2.109550 | 0.619634 | 2.366413 | 2.071539 | 1.000340 | 2.728281 | 1.391727 | 2.017176 | 2.610492 | ... | 0.601499 | -0.184248 | 0.564669 | 0.623353 | 0.466980 | 0.651774 | 0.308915 | 0.550097 | 0.466904 | 0.494024 |
4 | 69.756786 | 0.194549 | 0.348882 | 0.092119 | 0.653924 | 0.231422 | 1.080003 | 0.711244 | 1.357904 | 1.237998 | ... | 0.015292 | 0.070505 | 0.065835 | 0.051780 | 0.092940 | 0.103773 | 0.179405 | -0.089611 | 0.091841 | 0.056867 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
99995 | 63.323449 | 0.840651 | 1.186210 | 1.396236 | 0.417221 | 2.036034 | 1.659054 | 0.500584 | 1.693545 | 0.859932 | ... | 0.779955 | 0.005525 | 0.486013 | 0.273372 | 0.705386 | 0.602898 | 0.447929 | 0.474844 | 0.564266 | 0.133969 |
99996 | 69.657534 | 1.557787 | 1.393960 | 0.989147 | 1.611333 | 1.793044 | 1.092325 | 0.507138 | 1.763940 | 2.677643 | ... | 0.539489 | 0.114670 | 0.579498 | 0.417226 | 0.270110 | 0.556596 | 0.703258 | 0.462312 | 0.269719 | 0.539236 |
99997 | 40.897057 | 0.469758 | 1.000355 | 0.706395 | 1.190514 | 0.674603 | 1.632769 | 0.229008 | 2.027802 | 0.302457 | ... | 0.282597 | -0.474629 | 0.460647 | 0.478341 | 0.527891 | 0.904111 | 0.728529 | 0.178410 | 0.500813 | 0.773985 |
99998 | 42.333303 | 0.992948 | 1.354894 | 2.238589 | 1.237608 | 1.325212 | 2.785515 | 1.918571 | 0.814167 | 2.613950 | ... | 0.594252 | -0.162106 | 0.694276 | 0.681025 | 0.357196 | 0.498088 | 0.433297 | 0.406154 | 0.324771 | 0.340727 |
99999 | 53.290117 | 1.624625 | 1.739088 | 2.936555 | 0.154759 | 2.921164 | 2.183932 | 1.485150 | 2.685922 | 0.583443 | ... | 0.463697 | 0.289364 | 0.285321 | 0.422103 | 0.692009 | 0.276236 | 0.245780 | 0.269519 | 0.681719 | -0.053993 |
100000 rows × 708 columns
test_features= test_features.loc[:,list(train_features_filtered.columns)]
test_features
heartbeat_signals__sum_values | heartbeat_signals__fft_coefficient__attr_"abs"__coeff_38 | heartbeat_signals__fft_coefficient__attr_"abs"__coeff_37 | heartbeat_signals__fft_coefficient__attr_"abs"__coeff_36 | heartbeat_signals__fft_coefficient__attr_"abs"__coeff_35 | heartbeat_signals__fft_coefficient__attr_"abs"__coeff_34 | heartbeat_signals__fft_coefficient__attr_"abs"__coeff_33 | heartbeat_signals__fft_coefficient__attr_"abs"__coeff_32 | heartbeat_signals__fft_coefficient__attr_"abs"__coeff_31 | heartbeat_signals__fft_coefficient__attr_"abs"__coeff_30 | ... | heartbeat_signals__fft_coefficient__attr_"abs"__coeff_84 | heartbeat_signals__fft_coefficient__attr_"imag"__coeff_97 | heartbeat_signals__fft_coefficient__attr_"abs"__coeff_90 | heartbeat_signals__fft_coefficient__attr_"abs"__coeff_94 | heartbeat_signals__fft_coefficient__attr_"abs"__coeff_92 | heartbeat_signals__fft_coefficient__attr_"real"__coeff_97 | heartbeat_signals__fft_coefficient__attr_"abs"__coeff_75 | heartbeat_signals__fft_coefficient__attr_"real"__coeff_88 | heartbeat_signals__fft_coefficient__attr_"real"__coeff_92 | heartbeat_signals__fft_coefficient__attr_"real"__coeff_83 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 19.229863 | 2.381214 | 0.832151 | 2.509869 | 1.082112 | 2.517858 | 1.656104 | 2.257162 | 2.213421 | 1.815374 | ... | 0.563470 | -0.040576 | 0.485441 | 0.472059 | 0.448018 | 0.449347 | 0.479950 | 0.480448 | 0.442279 | 0.355992 |
1 | 84.298932 | 0.987660 | 0.856174 | 0.616261 | 0.293339 | 0.191558 | 0.528684 | 1.010080 | 1.478182 | 1.713876 | ... | 0.037307 | 0.010074 | 0.272897 | 0.247538 | 0.286948 | 0.143829 | 0.189416 | 0.124293 | 0.154624 | 0.077530 |
2 | 47.789921 | 0.696393 | 1.165387 | 1.004378 | 0.951231 | 1.542114 | 0.946219 | 1.673430 | 1.445220 | 1.118439 | ... | 0.738423 | -0.159505 | 0.418298 | 0.566628 | 0.849684 | 0.950851 | 0.779324 | 0.439255 | 0.839315 | 0.454957 |
3 | 47.069011 | 3.137668 | 0.044897 | 3.392946 | 3.054217 | 0.726293 | 3.582653 | 2.414946 | 1.257669 | 3.188068 | ... | 0.273142 | 0.366949 | 0.891690 | 0.214585 | 0.927562 | 0.648872 | 0.730178 | 0.606528 | 0.830105 | 0.662320 |
4 | 24.899397 | 0.496010 | 1.401020 | 0.536501 | 1.712592 | 1.044629 | 1.533405 | 1.330258 | 1.251771 | 1.441028 | ... | 0.644046 | -0.129700 | 0.578560 | 0.783258 | 0.480598 | 0.485003 | 0.667111 | 0.594234 | 0.447980 | 0.511133 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
19995 | 43.175130 | 1.776937 | 0.211527 | 1.986940 | 0.393550 | 1.693620 | 1.139395 | 1.459990 | 1.734535 | 1.025180 | ... | 0.546742 | -0.060254 | 0.507950 | 0.560192 | 0.541534 | 0.249750 | 0.608796 | 0.455444 | 0.535306 | 0.268471 |
19996 | 31.030782 | 1.451045 | 2.483726 | 1.105440 | 1.979721 | 2.821799 | 0.475276 | 2.782573 | 2.827882 | 0.520034 | ... | 0.491662 | 0.016413 | 0.480380 | 0.459172 | 0.363756 | 0.427028 | 0.544692 | 0.754834 | 0.361866 | 0.536087 |
19997 | 31.648623 | 2.141301 | 0.546706 | 2.340499 | 1.362651 | 1.942634 | 2.043679 | 0.994065 | 2.248144 | 1.007128 | ... | 0.529880 | 0.001012 | 0.768960 | 0.834159 | 0.672114 | 0.520215 | 0.341519 | 0.713419 | 0.664354 | 0.370047 |
19998 | 19.305442 | 0.221708 | 2.355288 | 1.051282 | 1.742370 | 2.164058 | 0.435583 | 2.649994 | 1.190594 | 2.328580 | ... | 0.527500 | -0.103574 | 0.521222 | 0.426435 | 0.636887 | 0.446365 | 0.551442 | 0.503703 | 0.635246 | 0.258394 |
19999 | 35.204569 | 0.827017 | 0.492990 | 1.627089 | 1.106799 | 0.639821 | 1.350155 | 0.533904 | 1.332401 | 1.229578 | ... | 0.248776 | 0.091218 | 0.659750 | 0.636282 | 0.319922 | 0.472824 | 0.355830 | 0.346311 | 0.312797 | 0.540855 |
20000 rows × 708 columns
#重命名,否则会报错
test_features.columns = range(test_features.shape[1])
train_features_filtered.columns = range(train_features_filtered.shape[1])
train_features_filtered
0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | ... | 698 | 699 | 700 | 701 | 702 | 703 | 704 | 705 | 706 | 707 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 38.927945 | 0.660949 | 1.090709 | 0.848728 | 1.168685 | 0.982133 | 1.223496 | 1.236300 | 1.104172 | 1.497129 | ... | 0.531883 | -0.047438 | 0.554370 | 0.307586 | 0.564596 | 0.562960 | 0.591859 | 0.504124 | 0.528450 | 0.473568 |
1 | 19.445634 | 1.718217 | 1.280923 | 1.850706 | 1.460752 | 1.924501 | 1.925485 | 1.715938 | 2.079957 | 1.818636 | ... | 0.563590 | -0.109579 | 0.697446 | 0.398073 | 0.640969 | 0.270192 | 0.224925 | 0.645082 | 0.635135 | 0.297325 |
2 | 21.192974 | 1.814281 | 1.619051 | 1.215343 | 1.787166 | 2.146987 | 1.686190 | 1.540137 | 2.291031 | 2.403422 | ... | 0.712487 | -0.074042 | 0.321703 | 0.390386 | 0.716929 | 0.316524 | 0.422077 | 0.722742 | 0.680590 | 0.383754 |
3 | 42.113066 | 2.109550 | 0.619634 | 2.366413 | 2.071539 | 1.000340 | 2.728281 | 1.391727 | 2.017176 | 2.610492 | ... | 0.601499 | -0.184248 | 0.564669 | 0.623353 | 0.466980 | 0.651774 | 0.308915 | 0.550097 | 0.466904 | 0.494024 |
4 | 69.756786 | 0.194549 | 0.348882 | 0.092119 | 0.653924 | 0.231422 | 1.080003 | 0.711244 | 1.357904 | 1.237998 | ... | 0.015292 | 0.070505 | 0.065835 | 0.051780 | 0.092940 | 0.103773 | 0.179405 | -0.089611 | 0.091841 | 0.056867 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
99995 | 63.323449 | 0.840651 | 1.186210 | 1.396236 | 0.417221 | 2.036034 | 1.659054 | 0.500584 | 1.693545 | 0.859932 | ... | 0.779955 | 0.005525 | 0.486013 | 0.273372 | 0.705386 | 0.602898 | 0.447929 | 0.474844 | 0.564266 | 0.133969 |
99996 | 69.657534 | 1.557787 | 1.393960 | 0.989147 | 1.611333 | 1.793044 | 1.092325 | 0.507138 | 1.763940 | 2.677643 | ... | 0.539489 | 0.114670 | 0.579498 | 0.417226 | 0.270110 | 0.556596 | 0.703258 | 0.462312 | 0.269719 | 0.539236 |
99997 | 40.897057 | 0.469758 | 1.000355 | 0.706395 | 1.190514 | 0.674603 | 1.632769 | 0.229008 | 2.027802 | 0.302457 | ... | 0.282597 | -0.474629 | 0.460647 | 0.478341 | 0.527891 | 0.904111 | 0.728529 | 0.178410 | 0.500813 | 0.773985 |
99998 | 42.333303 | 0.992948 | 1.354894 | 2.238589 | 1.237608 | 1.325212 | 2.785515 | 1.918571 | 0.814167 | 2.613950 | ... | 0.594252 | -0.162106 | 0.694276 | 0.681025 | 0.357196 | 0.498088 | 0.433297 | 0.406154 | 0.324771 | 0.340727 |
99999 | 53.290117 | 1.624625 | 1.739088 | 2.936555 | 0.154759 | 2.921164 | 2.183932 | 1.485150 | 2.685922 | 0.583443 | ... | 0.463697 | 0.289364 | 0.285321 | 0.422103 | 0.692009 | 0.276236 | 0.245780 | 0.269519 | 0.681719 | -0.053993 |
100000 rows × 708 columns
def f1_score_vali(preds, data_vali):
labels = data_vali.get_label()
preds = np.argmax(preds.reshape(4, -1), axis=0)
score_vali = f1_score(y_true=labels, y_pred=preds, average='macro')
return 'f1_score', score_vali, True
# 5折交叉验证
folds = 5
seed = 2021
kf = KFold(n_splits=folds, shuffle=True, random_state=seed)
"""对训练集数据进行划分,分成训练集和验证集,并进行相应的操作"""
from sklearn.model_selection import train_test_split
import lightgbm as lgb
# 数据集划分
X_train_split, X_val, y_train_split, y_val = train_test_split(train_features_filtered, data_train_label, test_size=0.2)
train_matrix = lgb.Dataset(X_train_split, label=y_train_split)
valid_matrix = lgb.Dataset(X_val, label=y_val)
params = {
"learning_rate": 0.1,
"boosting": 'gbdt',
"lambda_l2": 0.1,
"max_depth": -1,
"num_leaves": 128,
"bagging_fraction": 0.8,
"feature_fraction": 0.8,
"metric": None,
"objective": "multiclass",
"num_class": 4,
"nthread": 10,
"verbose": -1,
}
"""使用训练集数据进行模型训练"""
model = lgb.train(params,
train_set=train_matrix,
valid_sets=valid_matrix,
num_boost_round=2000,
verbose_eval=50,
early_stopping_rounds=200,
feval=f1_score_vali)
Training until validation scores don't improve for 200 rounds
[50] valid_0's multi_logloss: 0.0428144 valid_0's f1_score: 0.963841
[100] valid_0's multi_logloss: 0.0460252 valid_0's f1_score: 0.968245
[150] valid_0's multi_logloss: 0.0500013 valid_0's f1_score: 0.968559
[200] valid_0's multi_logloss: 0.052647 valid_0's f1_score: 0.969163
[250] valid_0's multi_logloss: 0.0543222 valid_0's f1_score: 0.969561
Early stopping, best iteration is:
[57] valid_0's multi_logloss: 0.0426016 valid_0's f1_score: 0.964341
val_pre_lgb = model.predict(X_val, num_iteration=model.best_iteration)
preds = np.argmax(val_pre_lgb, axis=1)
score = f1_score(y_true=y_val, y_pred=preds, average='macro')
print('未调参前lightgbm单模型在验证集上的f1:{}'.format(score))
未调参前lightgbm单模型在验证集上的f1:0.964340591815784
"""使用lightgbm 5折交叉验证进行建模预测"""
cv_scores = []
for i, (train_index, valid_index) in enumerate(kf.split(train_features_filtered, data_train_label)):
print('************************************ {} ************************************'.format(str(i+1)))
X_train_split, y_train_split, X_val, y_val = train_features_filtered.iloc[train_index], data_train_label[train_index], train_features_filtered.iloc[valid_index], data_train_label[valid_index]
train_matrix = lgb.Dataset(X_train_split, label=y_train_split)
valid_matrix = lgb.Dataset(X_val, label=y_val)
params = {
"learning_rate": 0.1,
"boosting": 'gbdt',
"lambda_l2": 0.1,
"max_depth": -1,
"num_leaves": 128,
"bagging_fraction": 0.8,
"feature_fraction": 0.8,
"metric": None,
"objective": "multiclass",
"num_class": 4,
"nthread": 10,
"verbose": -1,
}
model = lgb.train(params,
train_set=train_matrix,
valid_sets=valid_matrix,
num_boost_round=2000,
verbose_eval=100,
early_stopping_rounds=200,
feval=f1_score_vali)
val_pred = model.predict(X_val, num_iteration=model.best_iteration)
val_pred = np.argmax(val_pred, axis=1)
cv_scores.append(f1_score(y_true=y_val, y_pred=val_pred, average='macro'))
print(cv_scores)
print("lgb_scotrainre_list:{}".format(cv_scores))
print("lgb_score_mean:{}".format(np.mean(cv_scores)))
print("lgb_score_std:{}".format(np.std(cv_scores)))
************************************ 1 ************************************
Training until validation scores don't improve for 200 rounds
[100] valid_0's multi_logloss: 0.044439 valid_0's f1_score: 0.96688
[200] valid_0's multi_logloss: 0.0505908 valid_0's f1_score: 0.967785
Early stopping, best iteration is:
[64] valid_0's multi_logloss: 0.0415553 valid_0's f1_score: 0.964996
[0.9649962046497507]
************************************ 2 ************************************
Training until validation scores don't improve for 200 rounds
[100] valid_0's multi_logloss: 0.0475183 valid_0's f1_score: 0.969292
[200] valid_0's multi_logloss: 0.0546766 valid_0's f1_score: 0.968665
Early stopping, best iteration is:
[54] valid_0's multi_logloss: 0.0430632 valid_0's f1_score: 0.967664
[0.9649962046497507, 0.9676635779679497]
************************************ 3 ************************************
Training until validation scores don't improve for 200 rounds
[100] valid_0's multi_logloss: 0.0405601 valid_0's f1_score: 0.968579
[200] valid_0's multi_logloss: 0.0465485 valid_0's f1_score: 0.969218
Early stopping, best iteration is:
[58] valid_0's multi_logloss: 0.0376067 valid_0's f1_score: 0.96681
[0.9649962046497507, 0.9676635779679497, 0.9668097503210689]
************************************ 4 ************************************
Training until validation scores don't improve for 200 rounds
[100] valid_0's multi_logloss: 0.0499528 valid_0's f1_score: 0.966924
[200] valid_0's multi_logloss: 0.0576756 valid_0's f1_score: 0.967212
Early stopping, best iteration is:
[53] valid_0's multi_logloss: 0.045262 valid_0's f1_score: 0.963817
[0.9649962046497507, 0.9676635779679497, 0.9668097503210689, 0.9638170246056651]
************************************ 5 ************************************
Training until validation scores don't improve for 200 rounds
[100] valid_0's multi_logloss: 0.0416016 valid_0's f1_score: 0.968388
[200] valid_0's multi_logloss: 0.0471191 valid_0's f1_score: 0.9692
Early stopping, best iteration is:
[55] valid_0's multi_logloss: 0.0386577 valid_0's f1_score: 0.966255
[0.9649962046497507, 0.9676635779679497, 0.9668097503210689, 0.9638170246056651, 0.9662553812483348]
lgb_scotrainre_list:[0.9649962046497507, 0.9676635779679497, 0.9668097503210689, 0.9638170246056651, 0.9662553812483348]
lgb_score_mean:0.9659083877585539
lgb_score_std:0.0013578976016960567
"""定义优化函数"""
def rf_cv_lgb(num_leaves, max_depth, bagging_fraction, feature_fraction, bagging_freq, min_data_in_leaf,
min_child_weight, min_split_gain, reg_lambda, reg_alpha):
# 建立模型
model_lgb = lgb.LGBMClassifier(boosting_type='gbdt', objective='multiclass', num_class=4,
learning_rate=0.1, n_estimators=5000,
num_leaves=int(num_leaves), max_depth=int(max_depth),
bagging_fraction=round(bagging_fraction, 2), feature_fraction=round(feature_fraction, 2),
bagging_freq=int(bagging_freq), min_data_in_leaf=int(min_data_in_leaf),
min_child_weight=min_child_weight, min_split_gain=min_split_gain,
reg_lambda=reg_lambda, reg_alpha=reg_alpha,
n_jobs= 8
)
f1 = make_scorer(f1_score, average='micro')
val = cross_val_score(model_lgb, train_features_filtered, data_train_label, cv=5, scoring=f1).mean()
return val
from bayes_opt import BayesianOptimization
"""定义优化参数"""
bayes_lgb = BayesianOptimization(
rf_cv_lgb,
{
'num_leaves':(10, 200),
'max_depth':(3, 20),
'bagging_fraction':(0.5, 1.0),
'feature_fraction':(0.5, 1.0),
'bagging_freq':(0, 100),
'min_data_in_leaf':(10,100),
'min_child_weight':(0, 10),
'min_split_gain':(0.0, 1.0),
'reg_alpha':(0.0, 10),
'reg_lambda':(0.0, 10),
}
)
"""开始优化"""
bayes_lgb.maximize(n_iter=10)
| iter | target | baggin... | baggin... | featur... | max_depth | min_ch... | min_da... | min_sp... | num_le... | reg_alpha | reg_la... |
-------------------------------------------------------------------------------------------------------------------------------------------------
[LightGBM] [Warning] feature_fraction is set=0.59, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.59
[LightGBM] [Warning] min_data_in_leaf is set=15, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=15
[LightGBM] [Warning] bagging_fraction is set=0.67, subsample=1.0 will be ignored. Current value: bagging_fraction=0.67
[LightGBM] [Warning] bagging_freq is set=60, subsample_freq=0 will be ignored. Current value: bagging_freq=60
[LightGBM] [Warning] feature_fraction is set=0.59, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.59
[LightGBM] [Warning] min_data_in_leaf is set=15, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=15
[LightGBM] [Warning] bagging_fraction is set=0.67, subsample=1.0 will be ignored. Current value: bagging_fraction=0.67
[LightGBM] [Warning] bagging_freq is set=60, subsample_freq=0 will be ignored. Current value: bagging_freq=60
[LightGBM] [Warning] feature_fraction is set=0.59, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.59
[LightGBM] [Warning] min_data_in_leaf is set=15, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=15
[LightGBM] [Warning] bagging_fraction is set=0.67, subsample=1.0 will be ignored. Current value: bagging_fraction=0.67
[LightGBM] [Warning] bagging_freq is set=60, subsample_freq=0 will be ignored. Current value: bagging_freq=60
[LightGBM] [Warning] feature_fraction is set=0.59, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.59
[LightGBM] [Warning] min_data_in_leaf is set=15, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=15
[LightGBM] [Warning] bagging_fraction is set=0.67, subsample=1.0 will be ignored. Current value: bagging_fraction=0.67
[LightGBM] [Warning] bagging_freq is set=60, subsample_freq=0 will be ignored. Current value: bagging_freq=60
[LightGBM] [Warning] feature_fraction is set=0.59, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.59
[LightGBM] [Warning] min_data_in_leaf is set=15, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=15
[LightGBM] [Warning] bagging_fraction is set=0.67, subsample=1.0 will be ignored. Current value: bagging_fraction=0.67
[LightGBM] [Warning] bagging_freq is set=60, subsample_freq=0 will be ignored. Current value: bagging_freq=60
| [0m 1 [0m | [0m 0.9868 [0m | [0m 0.6664 [0m | [0m 60.62 [0m | [0m 0.5947 [0m | [0m 4.432 [0m | [0m 0.518 [0m | [0m 15.69 [0m | [0m 0.0146 [0m | [0m 173.1 [0m | [0m 9.551 [0m | [0m 5.358 [0m |
[LightGBM] [Warning] feature_fraction is set=0.98, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.98
[LightGBM] [Warning] min_data_in_leaf is set=13, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=13
[LightGBM] [Warning] bagging_fraction is set=0.83, subsample=1.0 will be ignored. Current value: bagging_fraction=0.83
[LightGBM] [Warning] bagging_freq is set=25, subsample_freq=0 will be ignored. Current value: bagging_freq=25
[LightGBM] [Warning] feature_fraction is set=0.98, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.98
[LightGBM] [Warning] min_data_in_leaf is set=13, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=13
[LightGBM] [Warning] bagging_fraction is set=0.83, subsample=1.0 will be ignored. Current value: bagging_fraction=0.83
[LightGBM] [Warning] bagging_freq is set=25, subsample_freq=0 will be ignored. Current value: bagging_freq=25
[LightGBM] [Warning] feature_fraction is set=0.98, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.98
[LightGBM] [Warning] min_data_in_leaf is set=13, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=13
[LightGBM] [Warning] bagging_fraction is set=0.83, subsample=1.0 will be ignored. Current value: bagging_fraction=0.83
[LightGBM] [Warning] bagging_freq is set=25, subsample_freq=0 will be ignored. Current value: bagging_freq=25
[LightGBM] [Warning] feature_fraction is set=0.98, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.98
[LightGBM] [Warning] min_data_in_leaf is set=13, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=13
[LightGBM] [Warning] bagging_fraction is set=0.83, subsample=1.0 will be ignored. Current value: bagging_fraction=0.83
[LightGBM] [Warning] bagging_freq is set=25, subsample_freq=0 will be ignored. Current value: bagging_freq=25
[LightGBM] [Warning] feature_fraction is set=0.98, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.98
[LightGBM] [Warning] min_data_in_leaf is set=13, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=13
[LightGBM] [Warning] bagging_fraction is set=0.83, subsample=1.0 will be ignored. Current value: bagging_fraction=0.83
[LightGBM] [Warning] bagging_freq is set=25, subsample_freq=0 will be ignored. Current value: bagging_freq=25
| [95m 2 [0m | [95m 0.9876 [0m | [95m 0.8343 [0m | [95m 25.57 [0m | [95m 0.9777 [0m | [95m 14.13 [0m | [95m 2.184 [0m | [95m 13.44 [0m | [95m 0.9095 [0m | [95m 58.58 [0m | [95m 0.4381 [0m | [95m 0.8606 [0m |
[LightGBM] [Warning] feature_fraction is set=0.65, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.65
[LightGBM] [Warning] min_data_in_leaf is set=73, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=73
[LightGBM] [Warning] bagging_fraction is set=0.84, subsample=1.0 will be ignored. Current value: bagging_fraction=0.84
[LightGBM] [Warning] bagging_freq is set=10, subsample_freq=0 will be ignored. Current value: bagging_freq=10
[LightGBM] [Warning] feature_fraction is set=0.65, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.65
[LightGBM] [Warning] min_data_in_leaf is set=73, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=73
[LightGBM] [Warning] bagging_fraction is set=0.84, subsample=1.0 will be ignored. Current value: bagging_fraction=0.84
[LightGBM] [Warning] bagging_freq is set=10, subsample_freq=0 will be ignored. Current value: bagging_freq=10
[LightGBM] [Warning] feature_fraction is set=0.65, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.65
[LightGBM] [Warning] min_data_in_leaf is set=73, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=73
[LightGBM] [Warning] bagging_fraction is set=0.84, subsample=1.0 will be ignored. Current value: bagging_fraction=0.84
[LightGBM] [Warning] bagging_freq is set=10, subsample_freq=0 will be ignored. Current value: bagging_freq=10
[LightGBM] [Warning] feature_fraction is set=0.65, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.65
[LightGBM] [Warning] min_data_in_leaf is set=73, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=73
[LightGBM] [Warning] bagging_fraction is set=0.84, subsample=1.0 will be ignored. Current value: bagging_fraction=0.84
[LightGBM] [Warning] bagging_freq is set=10, subsample_freq=0 will be ignored. Current value: bagging_freq=10
[LightGBM] [Warning] feature_fraction is set=0.65, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.65
[LightGBM] [Warning] min_data_in_leaf is set=73, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=73
[LightGBM] [Warning] bagging_fraction is set=0.84, subsample=1.0 will be ignored. Current value: bagging_fraction=0.84
[LightGBM] [Warning] bagging_freq is set=10, subsample_freq=0 will be ignored. Current value: bagging_freq=10
| [0m 3 [0m | [0m 0.9868 [0m | [0m 0.8395 [0m | [0m 10.14 [0m | [0m 0.6496 [0m | [0m 5.15 [0m | [0m 8.197 [0m | [0m 73.63 [0m | [0m 0.3264 [0m | [0m 171.7 [0m | [0m 6.567 [0m | [0m 9.195 [0m |
[LightGBM] [Warning] feature_fraction is set=0.61, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.61
[LightGBM] [Warning] min_data_in_leaf is set=82, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=82
[LightGBM] [Warning] bagging_fraction is set=0.57, subsample=1.0 will be ignored. Current value: bagging_fraction=0.57
[LightGBM] [Warning] bagging_freq is set=21, subsample_freq=0 will be ignored. Current value: bagging_freq=21
[LightGBM] [Warning] feature_fraction is set=0.61, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.61
[LightGBM] [Warning] min_data_in_leaf is set=82, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=82
[LightGBM] [Warning] bagging_fraction is set=0.57, subsample=1.0 will be ignored. Current value: bagging_fraction=0.57
[LightGBM] [Warning] bagging_freq is set=21, subsample_freq=0 will be ignored. Current value: bagging_freq=21
[LightGBM] [Warning] feature_fraction is set=0.61, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.61
[LightGBM] [Warning] min_data_in_leaf is set=82, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=82
[LightGBM] [Warning] bagging_fraction is set=0.57, subsample=1.0 will be ignored. Current value: bagging_fraction=0.57
[LightGBM] [Warning] bagging_freq is set=21, subsample_freq=0 will be ignored. Current value: bagging_freq=21
[LightGBM] [Warning] feature_fraction is set=0.61, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.61
[LightGBM] [Warning] min_data_in_leaf is set=82, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=82
[LightGBM] [Warning] bagging_fraction is set=0.57, subsample=1.0 will be ignored. Current value: bagging_fraction=0.57
[LightGBM] [Warning] bagging_freq is set=21, subsample_freq=0 will be ignored. Current value: bagging_freq=21
[LightGBM] [Warning] feature_fraction is set=0.61, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.61
[LightGBM] [Warning] min_data_in_leaf is set=82, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=82
[LightGBM] [Warning] bagging_fraction is set=0.57, subsample=1.0 will be ignored. Current value: bagging_fraction=0.57
[LightGBM] [Warning] bagging_freq is set=21, subsample_freq=0 will be ignored. Current value: bagging_freq=21
| [0m 4 [0m | [0m 0.9852 [0m | [0m 0.5688 [0m | [0m 21.72 [0m | [0m 0.6147 [0m | [0m 11.47 [0m | [0m 3.806 [0m | [0m 82.57 [0m | [0m 0.7054 [0m | [0m 56.34 [0m | [0m 8.745 [0m | [0m 3.569 [0m |
[LightGBM] [Warning] feature_fraction is set=0.51, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.51
[LightGBM] [Warning] min_data_in_leaf is set=13, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=13
[LightGBM] [Warning] bagging_fraction is set=0.9, subsample=1.0 will be ignored. Current value: bagging_fraction=0.9
[LightGBM] [Warning] bagging_freq is set=69, subsample_freq=0 will be ignored. Current value: bagging_freq=69
[LightGBM] [Warning] feature_fraction is set=0.51, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.51
[LightGBM] [Warning] min_data_in_leaf is set=13, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=13
[LightGBM] [Warning] bagging_fraction is set=0.9, subsample=1.0 will be ignored. Current value: bagging_fraction=0.9
[LightGBM] [Warning] bagging_freq is set=69, subsample_freq=0 will be ignored. Current value: bagging_freq=69
[LightGBM] [Warning] feature_fraction is set=0.51, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.51
[LightGBM] [Warning] min_data_in_leaf is set=13, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=13
[LightGBM] [Warning] bagging_fraction is set=0.9, subsample=1.0 will be ignored. Current value: bagging_fraction=0.9
[LightGBM] [Warning] bagging_freq is set=69, subsample_freq=0 will be ignored. Current value: bagging_freq=69
[LightGBM] [Warning] feature_fraction is set=0.51, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.51
[LightGBM] [Warning] min_data_in_leaf is set=13, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=13
[LightGBM] [Warning] bagging_fraction is set=0.9, subsample=1.0 will be ignored. Current value: bagging_fraction=0.9
[LightGBM] [Warning] bagging_freq is set=69, subsample_freq=0 will be ignored. Current value: bagging_freq=69
[LightGBM] [Warning] feature_fraction is set=0.51, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.51
[LightGBM] [Warning] min_data_in_leaf is set=13, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=13
[LightGBM] [Warning] bagging_fraction is set=0.9, subsample=1.0 will be ignored. Current value: bagging_fraction=0.9
[LightGBM] [Warning] bagging_freq is set=69, subsample_freq=0 will be ignored. Current value: bagging_freq=69
| [0m 5 [0m | [0m 0.9867 [0m | [0m 0.9022 [0m | [0m 69.5 [0m | [0m 0.5116 [0m | [0m 12.14 [0m | [0m 0.1771 [0m | [0m 13.8 [0m | [0m 0.2258 [0m | [0m 131.4 [0m | [0m 9.54 [0m | [0m 6.79 [0m |
[LightGBM] [Warning] feature_fraction is set=0.72, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.72
[LightGBM] [Warning] min_data_in_leaf is set=74, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=74
[LightGBM] [Warning] bagging_fraction is set=0.91, subsample=1.0 will be ignored. Current value: bagging_fraction=0.91
[LightGBM] [Warning] bagging_freq is set=7, subsample_freq=0 will be ignored. Current value: bagging_freq=7
[LightGBM] [Warning] feature_fraction is set=0.72, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.72
[LightGBM] [Warning] min_data_in_leaf is set=74, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=74
[LightGBM] [Warning] bagging_fraction is set=0.91, subsample=1.0 will be ignored. Current value: bagging_fraction=0.91
[LightGBM] [Warning] bagging_freq is set=7, subsample_freq=0 will be ignored. Current value: bagging_freq=7
[LightGBM] [Warning] feature_fraction is set=0.72, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.72
[LightGBM] [Warning] min_data_in_leaf is set=74, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=74
[LightGBM] [Warning] bagging_fraction is set=0.91, subsample=1.0 will be ignored. Current value: bagging_fraction=0.91
[LightGBM] [Warning] bagging_freq is set=7, subsample_freq=0 will be ignored. Current value: bagging_freq=7
[LightGBM] [Warning] feature_fraction is set=0.72, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.72
[LightGBM] [Warning] min_data_in_leaf is set=74, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=74
[LightGBM] [Warning] bagging_fraction is set=0.91, subsample=1.0 will be ignored. Current value: bagging_fraction=0.91
[LightGBM] [Warning] bagging_freq is set=7, subsample_freq=0 will be ignored. Current value: bagging_freq=7
[LightGBM] [Warning] feature_fraction is set=0.72, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.72
[LightGBM] [Warning] min_data_in_leaf is set=74, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=74
[LightGBM] [Warning] bagging_fraction is set=0.91, subsample=1.0 will be ignored. Current value: bagging_fraction=0.91
[LightGBM] [Warning] bagging_freq is set=7, subsample_freq=0 will be ignored. Current value: bagging_freq=7
| [0m 6 [0m | [0m 0.9867 [0m | [0m 0.9075 [0m | [0m 7.949 [0m | [0m 0.7221 [0m | [0m 3.142 [0m | [0m 7.502 [0m | [0m 74.29 [0m | [0m 0.7841 [0m | [0m 172.2 [0m | [0m 3.37 [0m | [0m 5.37 [0m |
[LightGBM] [Warning] feature_fraction is set=0.97, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.97
[LightGBM] [Warning] min_data_in_leaf is set=11, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=11
[LightGBM] [Warning] bagging_fraction is set=0.65, subsample=1.0 will be ignored. Current value: bagging_fraction=0.65
[LightGBM] [Warning] bagging_freq is set=23, subsample_freq=0 will be ignored. Current value: bagging_freq=23
[LightGBM] [Warning] feature_fraction is set=0.97, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.97
[LightGBM] [Warning] min_data_in_leaf is set=11, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=11
[LightGBM] [Warning] bagging_fraction is set=0.65, subsample=1.0 will be ignored. Current value: bagging_fraction=0.65
[LightGBM] [Warning] bagging_freq is set=23, subsample_freq=0 will be ignored. Current value: bagging_freq=23
[LightGBM] [Warning] feature_fraction is set=0.97, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.97
[LightGBM] [Warning] min_data_in_leaf is set=11, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=11
[LightGBM] [Warning] bagging_fraction is set=0.65, subsample=1.0 will be ignored. Current value: bagging_fraction=0.65
[LightGBM] [Warning] bagging_freq is set=23, subsample_freq=0 will be ignored. Current value: bagging_freq=23
[LightGBM] [Warning] feature_fraction is set=0.97, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.97
[LightGBM] [Warning] min_data_in_leaf is set=11, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=11
[LightGBM] [Warning] bagging_fraction is set=0.65, subsample=1.0 will be ignored. Current value: bagging_fraction=0.65
[LightGBM] [Warning] bagging_freq is set=23, subsample_freq=0 will be ignored. Current value: bagging_freq=23
[LightGBM] [Warning] feature_fraction is set=0.97, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.97
[LightGBM] [Warning] min_data_in_leaf is set=11, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=11
[LightGBM] [Warning] bagging_fraction is set=0.65, subsample=1.0 will be ignored. Current value: bagging_fraction=0.65
[LightGBM] [Warning] bagging_freq is set=23, subsample_freq=0 will be ignored. Current value: bagging_freq=23
| [95m 7 [0m | [95m 0.9881 [0m | [95m 0.6506 [0m | [95m 23.32 [0m | [95m 0.9719 [0m | [95m 10.45 [0m | [95m 8.121 [0m | [95m 11.57 [0m | [95m 0.1104 [0m | [95m 56.87 [0m | [95m 1.552 [0m | [95m 3.447 [0m |
[LightGBM] [Warning] feature_fraction is set=0.5, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.5
[LightGBM] [Warning] min_data_in_leaf is set=14, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=14
[LightGBM] [Warning] bagging_fraction is set=0.9, subsample=1.0 will be ignored. Current value: bagging_fraction=0.9
[LightGBM] [Warning] bagging_freq is set=15, subsample_freq=0 will be ignored. Current value: bagging_freq=15
[LightGBM] [Warning] feature_fraction is set=0.5, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.5
[LightGBM] [Warning] min_data_in_leaf is set=14, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=14
[LightGBM] [Warning] bagging_fraction is set=0.9, subsample=1.0 will be ignored. Current value: bagging_fraction=0.9
[LightGBM] [Warning] bagging_freq is set=15, subsample_freq=0 will be ignored. Current value: bagging_freq=15
[LightGBM] [Warning] feature_fraction is set=0.5, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.5
[LightGBM] [Warning] min_data_in_leaf is set=14, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=14
[LightGBM] [Warning] bagging_fraction is set=0.9, subsample=1.0 will be ignored. Current value: bagging_fraction=0.9
[LightGBM] [Warning] bagging_freq is set=15, subsample_freq=0 will be ignored. Current value: bagging_freq=15
[LightGBM] [Warning] feature_fraction is set=0.5, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.5
[LightGBM] [Warning] min_data_in_leaf is set=14, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=14
[LightGBM] [Warning] bagging_fraction is set=0.9, subsample=1.0 will be ignored. Current value: bagging_fraction=0.9
[LightGBM] [Warning] bagging_freq is set=15, subsample_freq=0 will be ignored. Current value: bagging_freq=15
[LightGBM] [Warning] feature_fraction is set=0.5, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.5
[LightGBM] [Warning] min_data_in_leaf is set=14, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=14
[LightGBM] [Warning] bagging_fraction is set=0.9, subsample=1.0 will be ignored. Current value: bagging_fraction=0.9
[LightGBM] [Warning] bagging_freq is set=15, subsample_freq=0 will be ignored. Current value: bagging_freq=15
| [0m 8 [0m | [0m 0.9865 [0m | [0m 0.9027 [0m | [0m 15.11 [0m | [0m 0.5037 [0m | [0m 4.505 [0m | [0m 8.569 [0m | [0m 14.23 [0m | [0m 0.3022 [0m | [0m 35.7 [0m | [0m 7.793 [0m | [0m 0.4155 [0m |
[LightGBM] [Warning] feature_fraction is set=0.8, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.8
[LightGBM] [Warning] min_data_in_leaf is set=12, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=12
[LightGBM] [Warning] bagging_fraction is set=0.8, subsample=1.0 will be ignored. Current value: bagging_fraction=0.8
[LightGBM] [Warning] bagging_freq is set=18, subsample_freq=0 will be ignored. Current value: bagging_freq=18
[LightGBM] [Warning] feature_fraction is set=0.8, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.8
[LightGBM] [Warning] min_data_in_leaf is set=12, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=12
[LightGBM] [Warning] bagging_fraction is set=0.8, subsample=1.0 will be ignored. Current value: bagging_fraction=0.8
[LightGBM] [Warning] bagging_freq is set=18, subsample_freq=0 will be ignored. Current value: bagging_freq=18
[LightGBM] [Warning] feature_fraction is set=0.8, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.8
[LightGBM] [Warning] min_data_in_leaf is set=12, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=12
[LightGBM] [Warning] bagging_fraction is set=0.8, subsample=1.0 will be ignored. Current value: bagging_fraction=0.8
[LightGBM] [Warning] bagging_freq is set=18, subsample_freq=0 will be ignored. Current value: bagging_freq=18
[LightGBM] [Warning] feature_fraction is set=0.8, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.8
[LightGBM] [Warning] min_data_in_leaf is set=12, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=12
[LightGBM] [Warning] bagging_fraction is set=0.8, subsample=1.0 will be ignored. Current value: bagging_fraction=0.8
[LightGBM] [Warning] bagging_freq is set=18, subsample_freq=0 will be ignored. Current value: bagging_freq=18
[LightGBM] [Warning] feature_fraction is set=0.8, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.8
[LightGBM] [Warning] min_data_in_leaf is set=12, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=12
[LightGBM] [Warning] bagging_fraction is set=0.8, subsample=1.0 will be ignored. Current value: bagging_fraction=0.8
[LightGBM] [Warning] bagging_freq is set=18, subsample_freq=0 will be ignored. Current value: bagging_freq=18
| [0m 9 [0m | [0m 0.9881 [0m | [0m 0.8046 [0m | [0m 18.11 [0m | [0m 0.8046 [0m | [0m 3.713 [0m | [0m 6.977 [0m | [0m 12.95 [0m | [0m 0.3034 [0m | [0m 62.43 [0m | [0m 0.8685 [0m | [0m 9.597 [0m |
[LightGBM] [Warning] feature_fraction is set=0.74, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.74
[LightGBM] [Warning] min_data_in_leaf is set=13, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=13
[LightGBM] [Warning] bagging_fraction is set=0.79, subsample=1.0 will be ignored. Current value: bagging_fraction=0.79
[LightGBM] [Warning] bagging_freq is set=25, subsample_freq=0 will be ignored. Current value: bagging_freq=25
[LightGBM] [Warning] feature_fraction is set=0.74, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.74
[LightGBM] [Warning] min_data_in_leaf is set=13, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=13
[LightGBM] [Warning] bagging_fraction is set=0.79, subsample=1.0 will be ignored. Current value: bagging_fraction=0.79
[LightGBM] [Warning] bagging_freq is set=25, subsample_freq=0 will be ignored. Current value: bagging_freq=25
[LightGBM] [Warning] feature_fraction is set=0.74, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.74
[LightGBM] [Warning] min_data_in_leaf is set=13, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=13
[LightGBM] [Warning] bagging_fraction is set=0.79, subsample=1.0 will be ignored. Current value: bagging_fraction=0.79
[LightGBM] [Warning] bagging_freq is set=25, subsample_freq=0 will be ignored. Current value: bagging_freq=25
[LightGBM] [Warning] feature_fraction is set=0.74, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.74
[LightGBM] [Warning] min_data_in_leaf is set=13, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=13
[LightGBM] [Warning] bagging_fraction is set=0.79, subsample=1.0 will be ignored. Current value: bagging_fraction=0.79
[LightGBM] [Warning] bagging_freq is set=25, subsample_freq=0 will be ignored. Current value: bagging_freq=25
[LightGBM] [Warning] feature_fraction is set=0.74, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.74
[LightGBM] [Warning] min_data_in_leaf is set=13, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=13
[LightGBM] [Warning] bagging_fraction is set=0.79, subsample=1.0 will be ignored. Current value: bagging_fraction=0.79
[LightGBM] [Warning] bagging_freq is set=25, subsample_freq=0 will be ignored. Current value: bagging_freq=25
| [0m 10 [0m | [0m 0.986 [0m | [0m 0.7898 [0m | [0m 25.66 [0m | [0m 0.7436 [0m | [0m 9.197 [0m | [0m 7.111 [0m | [0m 13.84 [0m | [0m 0.9618 [0m | [0m 54.75 [0m | [0m 7.634 [0m | [0m 2.996 [0m |
[LightGBM] [Warning] feature_fraction is set=0.68, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.68
[LightGBM] [Warning] min_data_in_leaf is set=23, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=23
[LightGBM] [Warning] bagging_fraction is set=0.53, subsample=1.0 will be ignored. Current value: bagging_fraction=0.53
[LightGBM] [Warning] bagging_freq is set=21, subsample_freq=0 will be ignored. Current value: bagging_freq=21
[LightGBM] [Warning] feature_fraction is set=0.68, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.68
[LightGBM] [Warning] min_data_in_leaf is set=23, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=23
[LightGBM] [Warning] bagging_fraction is set=0.53, subsample=1.0 will be ignored. Current value: bagging_fraction=0.53
[LightGBM] [Warning] bagging_freq is set=21, subsample_freq=0 will be ignored. Current value: bagging_freq=21
[LightGBM] [Warning] feature_fraction is set=0.68, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.68
[LightGBM] [Warning] min_data_in_leaf is set=23, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=23
[LightGBM] [Warning] bagging_fraction is set=0.53, subsample=1.0 will be ignored. Current value: bagging_fraction=0.53
[LightGBM] [Warning] bagging_freq is set=21, subsample_freq=0 will be ignored. Current value: bagging_freq=21
[LightGBM] [Warning] feature_fraction is set=0.68, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.68
[LightGBM] [Warning] min_data_in_leaf is set=23, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=23
[LightGBM] [Warning] bagging_fraction is set=0.53, subsample=1.0 will be ignored. Current value: bagging_fraction=0.53
[LightGBM] [Warning] bagging_freq is set=21, subsample_freq=0 will be ignored. Current value: bagging_freq=21
[LightGBM] [Warning] feature_fraction is set=0.68, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.68
[LightGBM] [Warning] min_data_in_leaf is set=23, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=23
[LightGBM] [Warning] bagging_fraction is set=0.53, subsample=1.0 will be ignored. Current value: bagging_fraction=0.53
[LightGBM] [Warning] bagging_freq is set=21, subsample_freq=0 will be ignored. Current value: bagging_freq=21
| [0m 11 [0m | [0m 0.9876 [0m | [0m 0.5345 [0m | [0m 21.17 [0m | [0m 0.6788 [0m | [0m 3.73 [0m | [0m 0.8905 [0m | [0m 23.36 [0m | [0m 0.5114 [0m | [0m 26.26 [0m | [0m 0.5502 [0m | [0m 0.4701 [0m |
[LightGBM] [Warning] feature_fraction is set=0.5, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.5
[LightGBM] [Warning] min_data_in_leaf is set=28, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=28
[LightGBM] [Warning] bagging_fraction is set=0.94, subsample=1.0 will be ignored. Current value: bagging_fraction=0.94
[LightGBM] [Warning] bagging_freq is set=94, subsample_freq=0 will be ignored. Current value: bagging_freq=94
[LightGBM] [Warning] feature_fraction is set=0.5, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.5
[LightGBM] [Warning] min_data_in_leaf is set=28, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=28
[LightGBM] [Warning] bagging_fraction is set=0.94, subsample=1.0 will be ignored. Current value: bagging_fraction=0.94
[LightGBM] [Warning] bagging_freq is set=94, subsample_freq=0 will be ignored. Current value: bagging_freq=94
[LightGBM] [Warning] feature_fraction is set=0.5, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.5
[LightGBM] [Warning] min_data_in_leaf is set=28, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=28
[LightGBM] [Warning] bagging_fraction is set=0.94, subsample=1.0 will be ignored. Current value: bagging_fraction=0.94
[LightGBM] [Warning] bagging_freq is set=94, subsample_freq=0 will be ignored. Current value: bagging_freq=94
[LightGBM] [Warning] feature_fraction is set=0.5, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.5
[LightGBM] [Warning] min_data_in_leaf is set=28, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=28
[LightGBM] [Warning] bagging_fraction is set=0.94, subsample=1.0 will be ignored. Current value: bagging_fraction=0.94
[LightGBM] [Warning] bagging_freq is set=94, subsample_freq=0 will be ignored. Current value: bagging_freq=94
[LightGBM] [Warning] feature_fraction is set=0.5, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.5
[LightGBM] [Warning] min_data_in_leaf is set=28, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=28
[LightGBM] [Warning] bagging_fraction is set=0.94, subsample=1.0 will be ignored. Current value: bagging_fraction=0.94
[LightGBM] [Warning] bagging_freq is set=94, subsample_freq=0 will be ignored. Current value: bagging_freq=94
| [0m 12 [0m | [0m 0.9881 [0m | [0m 0.9372 [0m | [0m 94.21 [0m | [0m 0.5011 [0m | [0m 16.17 [0m | [0m 5.918 [0m | [0m 28.96 [0m | [0m 0.04709 [0m | [0m 95.39 [0m | [0m 2.685 [0m | [0m 5.334 [0m |
[LightGBM] [Warning] feature_fraction is set=0.94, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.94
[LightGBM] [Warning] min_data_in_leaf is set=14, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=14
[LightGBM] [Warning] bagging_fraction is set=0.87, subsample=1.0 will be ignored. Current value: bagging_fraction=0.87
[LightGBM] [Warning] bagging_freq is set=27, subsample_freq=0 will be ignored. Current value: bagging_freq=27
[LightGBM] [Warning] feature_fraction is set=0.94, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.94
[LightGBM] [Warning] min_data_in_leaf is set=14, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=14
[LightGBM] [Warning] bagging_fraction is set=0.87, subsample=1.0 will be ignored. Current value: bagging_fraction=0.87
[LightGBM] [Warning] bagging_freq is set=27, subsample_freq=0 will be ignored. Current value: bagging_freq=27
[LightGBM] [Warning] feature_fraction is set=0.94, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.94
[LightGBM] [Warning] min_data_in_leaf is set=14, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=14
[LightGBM] [Warning] bagging_fraction is set=0.87, subsample=1.0 will be ignored. Current value: bagging_fraction=0.87
[LightGBM] [Warning] bagging_freq is set=27, subsample_freq=0 will be ignored. Current value: bagging_freq=27
[LightGBM] [Warning] feature_fraction is set=0.94, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.94
[LightGBM] [Warning] min_data_in_leaf is set=14, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=14
[LightGBM] [Warning] bagging_fraction is set=0.87, subsample=1.0 will be ignored. Current value: bagging_fraction=0.87
[LightGBM] [Warning] bagging_freq is set=27, subsample_freq=0 will be ignored. Current value: bagging_freq=27
[LightGBM] [Warning] feature_fraction is set=0.94, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.94
[LightGBM] [Warning] min_data_in_leaf is set=14, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=14
[LightGBM] [Warning] bagging_fraction is set=0.87, subsample=1.0 will be ignored. Current value: bagging_fraction=0.87
[LightGBM] [Warning] bagging_freq is set=27, subsample_freq=0 will be ignored. Current value: bagging_freq=27
| [0m 13 [0m | [0m 0.9872 [0m | [0m 0.871 [0m | [0m 27.94 [0m | [0m 0.9419 [0m | [0m 18.65 [0m | [0m 2.069 [0m | [0m 14.21 [0m | [0m 0.6722 [0m | [0m 61.81 [0m | [0m 2.236 [0m | [0m 2.226 [0m |
[LightGBM] [Warning] feature_fraction is set=0.71, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.71
[LightGBM] [Warning] min_data_in_leaf is set=32, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=32
[LightGBM] [Warning] bagging_fraction is set=0.82, subsample=1.0 will be ignored. Current value: bagging_fraction=0.82
[LightGBM] [Warning] bagging_freq is set=95, subsample_freq=0 will be ignored. Current value: bagging_freq=95
[LightGBM] [Warning] feature_fraction is set=0.71, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.71
[LightGBM] [Warning] min_data_in_leaf is set=32, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=32
[LightGBM] [Warning] bagging_fraction is set=0.82, subsample=1.0 will be ignored. Current value: bagging_fraction=0.82
[LightGBM] [Warning] bagging_freq is set=95, subsample_freq=0 will be ignored. Current value: bagging_freq=95
[LightGBM] [Warning] feature_fraction is set=0.71, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.71
[LightGBM] [Warning] min_data_in_leaf is set=32, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=32
[LightGBM] [Warning] bagging_fraction is set=0.82, subsample=1.0 will be ignored. Current value: bagging_fraction=0.82
[LightGBM] [Warning] bagging_freq is set=95, subsample_freq=0 will be ignored. Current value: bagging_freq=95
[LightGBM] [Warning] feature_fraction is set=0.71, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.71
[LightGBM] [Warning] min_data_in_leaf is set=32, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=32
[LightGBM] [Warning] bagging_fraction is set=0.82, subsample=1.0 will be ignored. Current value: bagging_fraction=0.82
[LightGBM] [Warning] bagging_freq is set=95, subsample_freq=0 will be ignored. Current value: bagging_freq=95
[LightGBM] [Warning] feature_fraction is set=0.71, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.71
[LightGBM] [Warning] min_data_in_leaf is set=32, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=32
[LightGBM] [Warning] bagging_fraction is set=0.82, subsample=1.0 will be ignored. Current value: bagging_fraction=0.82
[LightGBM] [Warning] bagging_freq is set=95, subsample_freq=0 will be ignored. Current value: bagging_freq=95
| [0m 14 [0m | [0m 0.9877 [0m | [0m 0.8161 [0m | [0m 95.06 [0m | [0m 0.7065 [0m | [0m 16.61 [0m | [0m 5.964 [0m | [0m 32.13 [0m | [0m 0.2743 [0m | [0m 90.09 [0m | [0m 2.749 [0m | [0m 8.467 [0m |
[LightGBM] [Warning] feature_fraction is set=0.6, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.6
[LightGBM] [Warning] min_data_in_leaf is set=27, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=27
[LightGBM] [Warning] bagging_fraction is set=0.62, subsample=1.0 will be ignored. Current value: bagging_fraction=0.62
[LightGBM] [Warning] bagging_freq is set=95, subsample_freq=0 will be ignored. Current value: bagging_freq=95
[LightGBM] [Warning] feature_fraction is set=0.6, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.6
[LightGBM] [Warning] min_data_in_leaf is set=27, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=27
[LightGBM] [Warning] bagging_fraction is set=0.62, subsample=1.0 will be ignored. Current value: bagging_fraction=0.62
[LightGBM] [Warning] bagging_freq is set=95, subsample_freq=0 will be ignored. Current value: bagging_freq=95
[LightGBM] [Warning] feature_fraction is set=0.6, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.6
[LightGBM] [Warning] min_data_in_leaf is set=27, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=27
[LightGBM] [Warning] bagging_fraction is set=0.62, subsample=1.0 will be ignored. Current value: bagging_fraction=0.62
[LightGBM] [Warning] bagging_freq is set=95, subsample_freq=0 will be ignored. Current value: bagging_freq=95
[LightGBM] [Warning] feature_fraction is set=0.6, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.6
[LightGBM] [Warning] min_data_in_leaf is set=27, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=27
[LightGBM] [Warning] bagging_fraction is set=0.62, subsample=1.0 will be ignored. Current value: bagging_fraction=0.62
[LightGBM] [Warning] bagging_freq is set=95, subsample_freq=0 will be ignored. Current value: bagging_freq=95
[LightGBM] [Warning] feature_fraction is set=0.6, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.6
[LightGBM] [Warning] min_data_in_leaf is set=27, min_child_samples=20 will be ignored. Current value: min_data_in_leaf=27
[LightGBM] [Warning] bagging_fraction is set=0.62, subsample=1.0 will be ignored. Current value: bagging_fraction=0.62
[LightGBM] [Warning] bagging_freq is set=95, subsample_freq=0 will be ignored. Current value: bagging_freq=95
| [0m 15 [0m | [0m 0.9864 [0m | [0m 0.6157 [0m | [0m 95.6 [0m | [0m 0.5999 [0m | [0m 14.13 [0m | [0m 8.518 [0m | [0m 27.99 [0m | [0m 0.2615 [0m | [0m 96.87 [0m | [0m 6.295 [0m | [0m 7.377 [0m |
=================================================================================================================================================
"""显示优化结果"""
bayes_lgb.max
{'target': 0.98811,
'params': {'bagging_fraction': 0.6505544185087316,
'bagging_freq': 23.317208046840786,
'feature_fraction': 0.9718811220117596,
'max_depth': 10.448372518021595,
'min_child_weight': 8.121202480695242,
'min_data_in_leaf': 11.569740165390415,
'min_split_gain': 0.11042859388856752,
'num_leaves': 56.870400765967844,
'reg_alpha': 1.551571676705884,
'reg_lambda': 3.4466437575649045}}
base_params_lgb = {
'boosting_type': 'gbdt',
'objective': 'multiclass',
'num_class': 4,
'learning_rate': 0.01,
'num_leaves': 56,
'max_depth': 10,
'min_child_weight':8.1,
'bagging_fraction': 0.65,
'feature_fraction': 0.97,
'bagging_freq': 23,
'reg_lambda': 3.4,
'reg_alpha': 1.55,
'min_split_gain': 0.11,
'nthread': 10,
'verbose': -1,
}
cv_result_lgb = lgb.cv(
train_set=train_matrix,
early_stopping_rounds=1000,
num_boost_round=20000,
nfold=5,
stratified=True,
shuffle=True,
params=base_params_lgb,
feval=f1_score_vali,
seed=0
)
print('迭代次数{}'.format(len(cv_result_lgb['f1_score-mean'])))
print('最终模型的f1为{}'.format(max(cv_result_lgb['f1_score-mean'])))
迭代次数3427
最终模型的f1为0.9642989128292301
import lightgbm as lgb
"""使用lightgbm 5折交叉验证进行建模预测"""
test = np.zeros((test_features.shape[0],4))
cv_scores = []
for i, (train_index, valid_index) in enumerate(kf.split(train_features_filtered, data_train_label)):
print('************************************ {} ************************************'.format(str(i+1)))
X_train_split, y_train_split, X_val, y_val = train_features_filtered.iloc[train_index], data_train_label[train_index], train_features_filtered.iloc[valid_index], data_train_label[valid_index]
train_matrix = lgb.Dataset(X_train_split, label=y_train_split)
valid_matrix = lgb.Dataset(X_val, label=y_val)
params = {
'boosting_type': 'gbdt',
'objective': 'multiclass',
'num_class': 4,
'learning_rate': 0.01,
'num_leaves': 56,
'max_depth': 10,
'min_child_weight':8.1,
'bagging_fraction': 0.65,
'feature_fraction': 0.97,
'bagging_freq': 23,
'reg_lambda': 3.4,
'reg_alpha': 1.55,
'min_split_gain': 0.11,
'nthread': 10,
'verbose': -1,
}
model = lgb.train(params, train_set=train_matrix, num_boost_round=4833, valid_sets=valid_matrix,
verbose_eval=1000, early_stopping_rounds=200, feval=f1_score_vali)
val_pred = model.predict(X_val, num_iteration=model.best_iteration)
test_pred = model.predict(test_features, num_iteration=model.best_iteration)
test += test_pred
val_pred = np.argmax(val_pred, axis=1)
cv_scores.append(f1_score(y_true=y_val, y_pred=val_pred, average='macro'))
print(cv_scores)
test=test/kf.n_splits
print("lgb_scotrainre_list:{}".format(cv_scores))
print("lgb_score_mean:{}".format(np.mean(cv_scores)))
print("lgb_score_std:{}".format(np.std(cv_scores)))
print("预测的概率矩阵为:")
print(test)
************************************ 1 ************************************
Training until validation scores don't improve for 200 rounds
[1000] valid_0's multi_logloss: 0.0413472 valid_0's f1_score: 0.962166
Early stopping, best iteration is:
[1011] valid_0's multi_logloss: 0.0412463 valid_0's f1_score: 0.962512
[0.9625123412824288]
************************************ 2 ************************************
Training until validation scores don't improve for 200 rounds
[1000] valid_0's multi_logloss: 0.0424555 valid_0's f1_score: 0.964571
Early stopping, best iteration is:
[983] valid_0's multi_logloss: 0.04259 valid_0's f1_score: 0.964617
[0.9625123412824288, 0.9646166618110472]
************************************ 3 ************************************
Training until validation scores don't improve for 200 rounds
[1000] valid_0's multi_logloss: 0.0374029 valid_0's f1_score: 0.965251
Early stopping, best iteration is:
[818] valid_0's multi_logloss: 0.0388073 valid_0's f1_score: 0.965592
[0.9625123412824288, 0.9646166618110472, 0.9655918274926425]
************************************ 4 ************************************
Training until validation scores don't improve for 200 rounds
[1000] valid_0's multi_logloss: 0.043221 valid_0's f1_score: 0.961963
Early stopping, best iteration is:
[1607] valid_0's multi_logloss: 0.0414588 valid_0's f1_score: 0.963825
[0.9625123412824288, 0.9646166618110472, 0.9655918274926425, 0.9638248555571988]
************************************ 5 ************************************
Training until validation scores don't improve for 200 rounds
[1000] valid_0's multi_logloss: 0.0385583 valid_0's f1_score: 0.962585
Early stopping, best iteration is:
[1436] valid_0's multi_logloss: 0.0369731 valid_0's f1_score: 0.964289
[0.9625123412824288, 0.9646166618110472, 0.9655918274926425, 0.9638248555571988, 0.9642893171331153]
lgb_scotrainre_list:[0.9625123412824288, 0.9646166618110472, 0.9655918274926425, 0.9638248555571988, 0.9642893171331153]
lgb_score_mean:0.9641670006552865
lgb_score_std:0.001010173143627339
预测的概率矩阵为:
[[9.98314727e-01 1.40585661e-03 1.47624009e-04 1.31792696e-04]
[2.27630608e-04 3.03358137e-04 9.99405658e-01 6.33531716e-05]
[7.48383528e-05 7.07481582e-05 1.40625239e-04 9.99713788e-01]
...
[1.47590909e-01 3.09707532e-04 8.51737404e-01 3.61979245e-04]
[9.99289198e-01 5.52825330e-04 1.15507823e-04 4.24687904e-05]
[9.25838547e-01 7.64934032e-03 1.96985548e-02 4.68135575e-02]]
temp=pd.DataFrame(test)
temp
0 | 1 | 2 | 3 | |
---|---|---|---|---|
0 | 0.998315 | 0.001406 | 0.000148 | 0.000132 |
1 | 0.000228 | 0.000303 | 0.999406 | 0.000063 |
2 | 0.000075 | 0.000071 | 0.000141 | 0.999714 |
3 | 0.999356 | 0.000438 | 0.000151 | 0.000055 |
4 | 0.999585 | 0.000087 | 0.000202 | 0.000126 |
... | ... | ... | ... | ... |
19995 | 0.971311 | 0.012828 | 0.000886 | 0.014975 |
19996 | 0.996253 | 0.002295 | 0.001206 | 0.000246 |
19997 | 0.147591 | 0.000310 | 0.851737 | 0.000362 |
19998 | 0.999289 | 0.000553 | 0.000116 | 0.000042 |
19999 | 0.925839 | 0.007649 | 0.019699 | 0.046814 |
20000 rows × 4 columns
result=pd.read_csv('sample_submit.csv')
result['label_0']=temp[0]
result['label_1']=temp[1]
result['label_2']=temp[2]
result['label_3']=temp[3]
result.to_csv('submit_v3_0.csv',index=False)
## 结果优化,概率较高的直接设为1,较低的直接设为0
def prob_opt(lgb_test,prob):
for index,row in enumerate(lgb_test):
row_max = max(row)
if row_max > prob:
for i in range(4):
if row[i] > prob:
lgb_test[index,i] = 1
else:
lgb_test[index,i] = 0
return lgb_test
test = prob_opt(test,0.8)
temp=pd.DataFrame(test)
temp
0 | 1 | 2 | 3 | |
---|---|---|---|---|
0 | 1.0 | 0.0 | 0.0 | 0.0 |
1 | 0.0 | 0.0 | 1.0 | 0.0 |
2 | 0.0 | 0.0 | 0.0 | 1.0 |
3 | 1.0 | 0.0 | 0.0 | 0.0 |
4 | 1.0 | 0.0 | 0.0 | 0.0 |
... | ... | ... | ... | ... |
19995 | 1.0 | 0.0 | 0.0 | 0.0 |
19996 | 1.0 | 0.0 | 0.0 | 0.0 |
19997 | 0.0 | 0.0 | 1.0 | 0.0 |
19998 | 1.0 | 0.0 | 0.0 | 0.0 |
19999 | 1.0 | 0.0 | 0.0 | 0.0 |
20000 rows × 4 columns
result=pd.read_csv('sample_submit.csv')
result['label_0']=temp[0]
result['label_1']=temp[1]
result['label_2']=temp[2]
result['label_3']=temp[3]
result.to_csv('submit_v3_1_80.csv',index=False)