lightgbm回归自动调参

最新推荐文章于 2024-05-30 14:58:57 发布

兔兔爱学习兔兔爱学习

最新推荐文章于 2024-05-30 14:58:57 发布

阅读量624

点赞数 8

文章标签：回归数据挖掘人工智能

本文链接：https://blog.csdn.net/weixin_44245188/article/details/137597748

版权

from lightgbm import LGBMRegressor
import optuna
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split, KFold
import optuna.integration.lightgbm as oplgb

def objective(trial):
X_train, X_test, y_train, y_test=train_test_split(data, target, train_size=0.3)# 数据集划分
param = {
‘metric’: ‘rmse’,
‘random_state’: 48,
‘n_estimators’: 20000,
‘reg_alpha’: trial.suggest_loguniform(‘reg_alpha’, 1e-3, 10.0),
‘reg_lambda’: trial.suggest_loguniform(‘reg_lambda’, 1e-3, 10.0),
‘colsample_bytree’: trial.suggest_categorical(‘colsample_bytree’, [0.3,0.4,0.5,0.6,0.7,0.8,0.9, 1.0]),
‘subsample’: trial.suggest_categorical(‘subsample’, [0.4,0.5,0.6,0.7,0.8,1.0]),
‘learning_rate’: trial.suggest_categorical(‘learning_rate’, [0.006,0.008,0.01,0.014,0.017,0.02]),
‘max_depth’: trial.suggest_categorical(‘max_depth’, [5, 7, 9, 11, 13, 15, 17, 20, 50]),
‘num_leaves’ : trial.suggest_int(‘num_leaves’, 1, 1000),
‘min_child_samples’: trial.suggest_int(‘min_child_samples’, 1, 300),
‘cat_smooth’ : trial.suggest_int(‘cat_smooth’, 1, 100)
}

lgb=LGBMRegressor(**param)
lgb.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=100, verbose=False)
pred_lgb=lgb.predict(X_test)
rmse = mean_squared_error(y_test, pred_lgb, squared=False)
return rmse

def objective(trial):
data = train.iloc[:, :-1]
target = train.target
train_x, test_x, train_y, test_y = train_test_split(data, target, test_size=0.3, random_state=42)
param = {
‘lambda’: trial.suggest_loguniform(‘lambda’, 1e-3, 10.0),
‘alpha’: trial.suggest_loguniform(‘alpha’, 1e-3, 10.0),
‘colsample_bytree’: trial.suggest_categorical(‘colsample_bytree’, [0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]),
‘subsample’: trial.suggest_categorical(‘subsample’, [0.4, 0.5, 0.6, 0.7, 0.8, 1.0]),
‘learning_rate’: trial.suggest_categorical(‘learning_rate’,
[0.008, 0.009, 0.01, 0.012, 0.014, 0.016, 0.018, 0.02]),
‘n_estimators’: 4000,
‘max_depth’: trial.suggest_categorical(‘max_depth’, [5, 7, 9, 11, 13, 15, 17, 20]),
‘random_state’: trial.suggest_categorical(‘random_state’, [24, 48, 2020]),
‘min_child_weight’: trial.suggest_int(‘min_child_weight’, 1, 300),
}
model = xgb.XGBRegressor(**param)
model.fit(train_x, train_y, eval_set=[(test_x, test_y)], early_stopping_rounds=100, verbose=False)
preds = model.predict(test_x)
rmse = mean_squared_error(test_y, preds, squared=False)
return rmse
def objective(trial):
data = train.iloc[:, :-1]
target = train.target
train_x, test_x, train_y, test_y = train_test_split(data, target, test_size=0.3, random_state=42)
param = {
‘lambda’: trial.suggest_loguniform(‘lambda’, 1e-3, 10.0),
‘alpha’: trial.suggest_loguniform(‘alpha’, 1e-3, 10.0),
‘colsample_bytree’: trial.suggest_categorical(‘colsample_bytree’, [0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]),
‘subsample’: trial.suggest_categorical(‘subsample’, [0.4, 0.5, 0.6, 0.7, 0.8, 1.0]),
‘learning_rate’: trial.suggest_categorical(‘learning_rate’,
[0.008, 0.009, 0.01, 0.012, 0.014, 0.016, 0.018, 0.02]),
‘n_estimators’: 4000,
‘max_depth’: trial.suggest_categorical(‘max_depth’, [5, 7, 9, 11, 13, 15, 17, 20]),
‘random_state’: trial.suggest_categorical(‘random_state’, [24, 48, 2020]),
‘min_child_weight’: trial.suggest_int(‘min_child_weight’, 1, 300),
}
model = xgb.XGBRegressor(**param)
model.fit(train_x, train_y, eval_set=[(test_x, test_y)], early_stopping_rounds=100, verbose=False)
preds = model.predict(test_x)
rmse = mean_squared_error(test_y, preds, squared=False)
return rmse
study = optuna.create_study(direction=‘minimize’)
n_trials=1
study.optimize(objective, n_trials=n_trials)
print(‘Number of finished trials:’, len(study.trials))
print(“------------------------------------------------”)
print(‘Best trial:’, study.best_trial.params)
print(“------------------------------------------------”)
print(study.trials_dataframe())
print(“------------------------------------------------”)
optuna.visualization.plot_optimization_history(study).show()
#plot_parallel_coordinate: interactively visualizes the hyperparameters and scores
optuna.visualization.plot_parallel_coordinate(study).show()
‘’‘plot_slice: shows the evolution of the search. You can see where in the hyperparameter space your search
went and which parts of the space were explored more.’‘’
optuna.visualization.plot_slice(study).show()
optuna.visualization.plot_contour(study, params=[‘alpha’,
#‘max_depth’,
‘lambda’,
‘subsample’,
‘learning_rate’,
‘subsample’]).show()
#Visualize parameter importances.
optuna.visualization.plot_param_importances(study).show()
#Visualize empirical distribution function
optuna.visualization.plot_edf(study).show()