优化XGBoost模型

提供一个示例,演示如何使用遗传算法(Genetic Algorithm, GA)、粒子群优化(Particle Swarm Optimization, PSO)和灰狼优化(Grey Wolf Optimization, GWO)来优化XGBoost模型的参数,并将每个优化后的模型保存到文件中。让我们分步来完成:

首先,我们将实现遗传算法(GA):

import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
from geneticalgorithm import geneticalgorithm as ga

# 加载数据
file_path = 'D:\\pythontest\\pythontest.xlsx'
data = pd.read_excel(file_path, engine='openpyxl')

# 预处理数据
X = data.iloc[:, 2:]  # 高光谱值作为特征
y = data.iloc[:, 1]  # Li含量作为标签

# 分割数据集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 定义XGBoost模型的优化目标函数
def objective_function(params):
    learning_rate, max_depth, min_child_weight, subsample, colsample_bytree, gamma = params
    model = xgb.XGBRegressor(learning_rate=learning_rate,
                              max_depth=int(max_depth),
                              min_child_weight=min_child_weight,
                              subsample=subsample,
                              colsample_bytree=colsample_bytree,
                              gamma=gamma,
                              random_state=42)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    rmse = mean_squared_error(y_test, y_pred, squared=False)
    return rmse

# 设置参数范围
varbound=np.array([[0.01,1],[1, 10],[1, 10],[0.1, 1],[0.1, 1],[0.1, 1]])

# 运行遗传算法
algorithm_param = {'max_num_iteration': 100,\
                   'population_size':100,\
                   'mutation_probability':0.1,\
                   'elit_ratio': 0.01,\
                   'crossover_probability': 0.5,\
                   'parents_portion': 0.3,\
                   'crossover_type':'uniform',\
                   'max_iteration_without_improv':None}
ga_instance = ga(function=objective_function, dimension=6, variable_type='real', variable_boundaries=varbound, algorithm_parameters=algorithm_param)
ga_instance.run()

# 获取最佳参数
best_params_ga = ga_instance.output_dict['variable']

# 使用最佳参数重新训练XGBoost模型
learning_rate, max_depth, min_child_weight, subsample, colsample_bytree, gamma = best_params_ga
best_model_ga = xgb.XGBRegressor(learning_rate=learning_rate,
                                  max_depth=int(max_depth),
                                  min_child_weight=min_child_weight,
                                  subsample=subsample,
                                  colsample_bytree=colsample_bytree,
                                  gamma=gamma,
                                  random_state=42)
best_model_ga.fit(X_train, y_train)

# 验证模型
y_pred_ga = best_model_ga.predict(X_test)
r2_ga = r2_score(y_test, y_pred_ga)
rmse_ga = mean_squared_error(y_test, y_pred_ga, squared=False)

print("遗传算法(GA) XGBoost 模型评估结果:")
print(f'R² score: {r2_ga}')
print(f'RMSE: {rmse_ga}')

# 保存模型
best_model_ga.save_model("xgb_model_ga.json")

实现粒子群优化(PSO):

import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
from pyswarms.single.global_best import GlobalBestPSO

# 加载数据
file_path = 'D:\\pythontest\\pythontest.xlsx'
data = pd.read_excel(file_path, engine='openpyxl')

# 预处理数据
X = data.iloc[:, 2:]  # 高光谱值作为特征
y = data.iloc[:, 1]  # Li含量作为标签

# 分割数据集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 定义XGBoost模型的优化目标函数
def objective_function(params):
    learning_rate, max_depth, min_child_weight, subsample, colsample_bytree, gamma = params
    model = xgb.XGBRegressor(learning_rate=learning_rate,
                              max_depth=int(max_depth),
                              min_child_weight=min_child_weight,
                              subsample=subsample,
                              colsample_bytree=colsample_bytree,
                              gamma=gamma,
                              random_state=42)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    rmse = mean_squared_error(y_test, y_pred, squared=False)
    return rmse

# 设置参数范围
max_bound = [1, 10, 10, 1, 1, 1]  # 上限
min_bound = [0.01, 1, 1, 0.1, 0.1, 0.1]  # 下限
bounds = (min_bound, max_bound)

# 运行粒子群优化
optimizer = GlobalBestPSO(n_particles=10, dimensions=6, bounds=bounds)
best_params_pso = optimizer.optimize(objective_function, iters=10)

# 使用最佳参数重新训练XGBoost模型
learning_rate, max_depth, min_child_weight, subsample, colsample_bytree, gamma = best_params_pso
best_model_pso = xgb.XGBRegressor(learning_rate=learning_rate,
                                   max_depth=int(max_depth),
                                   min_child_weight=min_child_weight,
                                   subsample=subsample,
                                   colsample_bytree=colsample_bytree,
                                   gamma=gamma,
                                   random_state=42)
best_model_pso.fit(X_train, y_train)

# 验证模型
y_pred_pso = best_model_pso.predict(X_test)
r2_pso = r2_score(y_test, y_pred_pso)
rmse_pso = mean_squared_error(y_test, y_pred_pso, squared=False)

print("粒子群优化(PSO) XGBoost 模型评估结果:")
print(f'R² score: {r2_pso}')
print(f'RMSE: {rmse_pso}')

# 保存模型
best_model_pso.save_model("xgb_model_pso.json")

灰狼优化(GWO)

import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
from greyatom import GreyWolfOptimizer

# 加载数据
file_path = 'D:\\pythontest\\pythontest.xlsx'
data = pd.read_excel(file_path, engine='openpyxl')

# 预处理数据
X = data.iloc[:, 2:]  # 高光谱值作为特征
y = data.iloc[:, 1]  # Li含量作为标签

# 分割数据集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 定义XGBoost模型的优化目标函数
def objective_function(params):
    learning_rate, max_depth, min_child_weight, subsample, colsample_bytree, gamma = params
    model = xgb.XGBRegressor(learning_rate=learning_rate,
                              max_depth=int(max_depth),
                              min_child_weight=min_child_weight,
                              subsample=subsample,
                              colsample_bytree=colsample_bytree,
                              gamma=gamma,
                              random_state=42)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    rmse = mean_squared_error(y_test, y_pred, squared=False)
    return rmse

# 设置参数范围
search_space = {"learning_rate": ("float", 0.01, 1.0),
                "max_depth": ("int", 1, 10),
                "min_child_weight": ("float", 1, 10),
                "subsample": ("float", 0.1, 1.0),
                "colsample_bytree": ("float", 0.1, 1.0),
                "gamma": ("float", 0.1, 1.0)}

# 运行灰狼优化
gwo = GreyWolfOptimizer()
best_params_gwo = gwo.optimize(objective_function, search_space)

# 使用最佳参数重新训练XGBoost模型
learning_rate, max_depth, min_child_weight, subsample, colsample_bytree, gamma = best_params_gwo
best_model_gwo = xgb.XGBRegressor(learning_rate=learning_rate,
                                   max_depth=int(max_depth),
                                   min_child_weight=min_child_weight,
                                   subsample=subsample,
                                   colsample_bytree=colsample_bytree,
                                   gamma=gamma,
                                   random_state=42)
best_model_gwo.fit(X_train, y_train)

# 验证模型
y_pred_gwo = best_model_gwo.predict(X_test)
r2_gwo = r2_score(y_test, y_pred_gwo)
rmse_gwo = mean_squared_error(y_test, y_pred_gwo, squared=False)

print("灰狼优化(GWO) XGBoost 模型评估结果:")
print(f'R² score: {r2_gwo}')
print(f'RMSE: {rmse_gwo}')

# 保存模型
best_model_gwo.save_model("xgb_model_gwo.json")

  • 3
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值