优化XGBoost模型

RS_Goose

已于 2024-05-09 20:22:39 修改

阅读量455

点赞数 3

文章标签：机器学习人工智能

于 2024-05-09 20:22:23 首次发布

本文链接：https://blog.csdn.net/qq_16171663/article/details/138629106

版权

提供一个示例，演示如何使用遗传算法（Genetic Algorithm, GA）、粒子群优化（Particle Swarm Optimization, PSO）和灰狼优化（Grey Wolf Optimization, GWO）来优化XGBoost模型的参数，并将每个优化后的模型保存到文件中。让我们分步来完成：

首先，我们将实现遗传算法（GA）：

import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
from geneticalgorithm import geneticalgorithm as ga

# 加载数据
file_path = 'D:\\pythontest\\pythontest.xlsx'
data = pd.read_excel(file_path, engine='openpyxl')

# 预处理数据
X = data.iloc[:, 2:]  # 高光谱值作为特征
y = data.iloc[:, 1]  # Li含量作为标签

# 分割数据集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 定义XGBoost模型的优化目标函数
def objective_function(params):
    learning_rate, max_depth, min_child_weight, subsample, colsample_bytree, gamma = params
    model = xgb.XGBRegressor(learning_rate=learning_rate,
                              max_depth=int(max_depth),
                              min_child_weight=min_child_weight,
                              subsample=subsample,
                              colsample_bytree=colsample_bytree,
                              gamma=gamma,
                              random_state=42)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    rmse = mean_squared_error(y_test, y_pred, squared=False)
    return rmse

# 设置参数范围
varbound=np.array([[0.01,1],[1, 10],[1, 10],[0.1, 1],[0.1, 1],[0.1, 1]])

# 运行遗传算法
algorithm_param = {'max_num_iteration': 100,\
                   'population_size':100,\
                   'mutation_probability':0.1,\
                   'elit_ratio': 0.01,\
                   'crossover_probability': 0.5,\
                   'parents_portion': 0.3,\
                   'crossover_type':'uniform',\
                   'max_iteration_without_improv':None}
ga_instance = ga(function=objective_function, dimension=6, variable_type='real', variable_boundaries=varbound, algorithm_parameters=algorithm_param)
ga_instance.run()

# 获取最佳参数
best_params_ga = ga_instance.output_dict['variable']

# 使用最佳参数重新训练XGBoost模型
learning_rate, max_depth, min_child_weight, subsample, colsample_bytree, gamma = best_params_ga
best_model_ga = xgb.XGBRegressor(learning_rate=learning_rate,
                                  max_depth=int(max_depth),
                                  min_child_weight=min_child_weight,
                                  subsample=subsample,
                                  colsample_bytree=colsample_bytree,
                                  gamma=gamma,
                                  random_state=42)
best_model_ga.fit(X_train, y_train)

# 验证模型
y_pred_ga = best_model_ga.predict(X_test)
r2_ga = r2_score(y_test, y_pred_ga)
rmse_ga = mean_squared_error(y_test, y_pred_ga, squared=False)

print("遗传算法（GA） XGBoost 模型评估结果:")
print(f'R² score: {r2_ga}')
print(f'RMSE: {rmse_ga}')

# 保存模型
best_model_ga.save_model("xgb_model_ga.json")

实现粒子群优化（PSO）：

import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
from pyswarms.single.global_best import GlobalBestPSO

# 加载数据
file_path = 'D:\\pythontest\\pythontest.xlsx'
data = pd.read_excel(file_path, engine='openpyxl')

# 预处理数据
X = data.iloc[:, 2:]  # 高光谱值作为特征
y = data.iloc[:, 1]  # Li含量作为标签

# 分割数据集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 定义XGBoost模型的优化目标函数
def objective_function(params):
    learning_rate, max_depth, min_child_weight, subsample, colsample_bytree, gamma = params
    model = xgb.XGBRegressor(learning_rate=learning_rate,
                              max_depth=int(max_depth),
                              min_child_weight=min_child_weight,
                              subsample=subsample,
                              colsample_bytree=colsample_bytree,
                              gamma=gamma,
                              random_state=42)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    rmse = mean_squared_error(y_test, y_pred, squared=False)
    return rmse

# 设置参数范围
max_bound = [1, 10, 10, 1, 1, 1]  # 上限
min_bound = [0.01, 1, 1, 0.1, 0.1, 0.1]  # 下限
bounds = (min_bound, max_bound)

# 运行粒子群优化
optimizer = GlobalBestPSO(n_particles=10, dimensions=6, bounds=bounds)
best_params_pso = optimizer.optimize(objective_function, iters=10)

# 使用最佳参数重新训练XGBoost模型
learning_rate, max_depth, min_child_weight, subsample, colsample_bytree, gamma = best_params_pso
best_model_pso = xgb.XGBRegressor(learning_rate=learning_rate,
                                   max_depth=int(max_depth),
                                   min_child_weight=min_child_weight,
                                   subsample=subsample,
                                   colsample_bytree=colsample_bytree,
                                   gamma=gamma,
                                   random_state=42)
best_model_pso.fit(X_train, y_train)

# 验证模型
y_pred_pso = best_model_pso.predict(X_test)
r2_pso = r2_score(y_test, y_pred_pso)
rmse_pso = mean_squared_error(y_test, y_pred_pso, squared=False)

print("粒子群优化（PSO） XGBoost 模型评估结果:")
print(f'R² score: {r2_pso}')
print(f'RMSE: {rmse_pso}')

# 保存模型
best_model_pso.save_model("xgb_model_pso.json")

灰狼优化（GWO）

import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
from greyatom import GreyWolfOptimizer

# 加载数据
file_path = 'D:\\pythontest\\pythontest.xlsx'
data = pd.read_excel(file_path, engine='openpyxl')

# 预处理数据
X = data.iloc[:, 2:]  # 高光谱值作为特征
y = data.iloc[:, 1]  # Li含量作为标签

# 分割数据集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 定义XGBoost模型的优化目标函数
def objective_function(params):
    learning_rate, max_depth, min_child_weight, subsample, colsample_bytree, gamma = params
    model = xgb.XGBRegressor(learning_rate=learning_rate,
                              max_depth=int(max_depth),
                              min_child_weight=min_child_weight,
                              subsample=subsample,
                              colsample_bytree=colsample_bytree,
                              gamma=gamma,
                              random_state=42)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    rmse = mean_squared_error(y_test, y_pred, squared=False)
    return rmse

# 设置参数范围
search_space = {"learning_rate": ("float", 0.01, 1.0),
                "max_depth": ("int", 1, 10),
                "min_child_weight": ("float", 1, 10),
                "subsample": ("float", 0.1, 1.0),
                "colsample_bytree": ("float", 0.1, 1.0),
                "gamma": ("float", 0.1, 1.0)}

# 运行灰狼优化
gwo = GreyWolfOptimizer()
best_params_gwo = gwo.optimize(objective_function, search_space)

# 使用最佳参数重新训练XGBoost模型
learning_rate, max_depth, min_child_weight, subsample, colsample_bytree, gamma = best_params_gwo
best_model_gwo = xgb.XGBRegressor(learning_rate=learning_rate,
                                   max_depth=int(max_depth),
                                   min_child_weight=min_child_weight,
                                   subsample=subsample,
                                   colsample_bytree=colsample_bytree,
                                   gamma=gamma,
                                   random_state=42)
best_model_gwo.fit(X_train, y_train)

# 验证模型
y_pred_gwo = best_model_gwo.predict(X_test)
r2_gwo = r2_score(y_test, y_pred_gwo)
rmse_gwo = mean_squared_error(y_test, y_pred_gwo, squared=False)

print("灰狼优化（GWO） XGBoost 模型评估结果:")
print(f'R² score: {r2_gwo}')
print(f'RMSE: {rmse_gwo}')

# 保存模型
best_model_gwo.save_model("xgb_model_gwo.json")

RS_Goose

关注

3
点赞
踩
3

收藏

觉得还不错? 一键收藏
0
评论
优化XGBoost模型

提供一个示例，演示如何使用遗传算法（Genetic Algorithm, GA）、粒子群优化（Particle Swarm Optimization, PSO）和灰狼优化（Grey Wolf Optimization, GWO）来优化XGBoost模型的参数，并将每个优化后的模型保存到文件中。
复制链接

扫一扫