提供一个示例,演示如何使用遗传算法(Genetic Algorithm, GA)、粒子群优化(Particle Swarm Optimization, PSO)和灰狼优化(Grey Wolf Optimization, GWO)来优化XGBoost模型的参数,并将每个优化后的模型保存到文件中。让我们分步来完成:
首先,我们将实现遗传算法(GA):
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
from geneticalgorithm import geneticalgorithm as ga
# 加载数据
file_path = 'D:\\pythontest\\pythontest.xlsx'
data = pd.read_excel(file_path, engine='openpyxl')
# 预处理数据
X = data.iloc[:, 2:] # 高光谱值作为特征
y = data.iloc[:, 1] # Li含量作为标签
# 分割数据集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# 定义XGBoost模型的优化目标函数
def objective_function(params):
learning_rate, max_depth, min_child_weight, subsample, colsample_bytree, gamma = params
model = xgb.XGBRegressor(learning_rate=learning_rate,
max_depth=int(max_depth),
min_child_weight=min_child_weight,
subsample=subsample,
colsample_bytree=colsample_bytree,
gamma=gamma,
random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
rmse = mean_squared_error(y_test, y_pred, squared=False)
return rmse
# 设置参数范围
varbound=np.array([[0.01,1],[1, 10],[1, 10],[0.1, 1],[0.1, 1],[0.1, 1]])
# 运行遗传算法
algorithm_param = {'max_num_iteration': 100,\
'population_size':100,\
'mutation_probability':0.1,\
'elit_ratio': 0.01,\
'crossover_probability': 0.5,\
'parents_portion': 0.3,\
'crossover_type':'uniform',\
'max_iteration_without_improv':None}
ga_instance = ga(function=objective_function, dimension=6, variable_type='real', variable_boundaries=varbound, algorithm_parameters=algorithm_param)
ga_instance.run()
# 获取最佳参数
best_params_ga = ga_instance.output_dict['variable']
# 使用最佳参数重新训练XGBoost模型
learning_rate, max_depth, min_child_weight, subsample, colsample_bytree, gamma = best_params_ga
best_model_ga = xgb.XGBRegressor(learning_rate=learning_rate,
max_depth=int(max_depth),
min_child_weight=min_child_weight,
subsample=subsample,
colsample_bytree=colsample_bytree,
gamma=gamma,
random_state=42)
best_model_ga.fit(X_train, y_train)
# 验证模型
y_pred_ga = best_model_ga.predict(X_test)
r2_ga = r2_score(y_test, y_pred_ga)
rmse_ga = mean_squared_error(y_test, y_pred_ga, squared=False)
print("遗传算法(GA) XGBoost 模型评估结果:")
print(f'R² score: {r2_ga}')
print(f'RMSE: {rmse_ga}')
# 保存模型
best_model_ga.save_model("xgb_model_ga.json")
实现粒子群优化(PSO):
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
from pyswarms.single.global_best import GlobalBestPSO
# 加载数据
file_path = 'D:\\pythontest\\pythontest.xlsx'
data = pd.read_excel(file_path, engine='openpyxl')
# 预处理数据
X = data.iloc[:, 2:] # 高光谱值作为特征
y = data.iloc[:, 1] # Li含量作为标签
# 分割数据集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# 定义XGBoost模型的优化目标函数
def objective_function(params):
learning_rate, max_depth, min_child_weight, subsample, colsample_bytree, gamma = params
model = xgb.XGBRegressor(learning_rate=learning_rate,
max_depth=int(max_depth),
min_child_weight=min_child_weight,
subsample=subsample,
colsample_bytree=colsample_bytree,
gamma=gamma,
random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
rmse = mean_squared_error(y_test, y_pred, squared=False)
return rmse
# 设置参数范围
max_bound = [1, 10, 10, 1, 1, 1] # 上限
min_bound = [0.01, 1, 1, 0.1, 0.1, 0.1] # 下限
bounds = (min_bound, max_bound)
# 运行粒子群优化
optimizer = GlobalBestPSO(n_particles=10, dimensions=6, bounds=bounds)
best_params_pso = optimizer.optimize(objective_function, iters=10)
# 使用最佳参数重新训练XGBoost模型
learning_rate, max_depth, min_child_weight, subsample, colsample_bytree, gamma = best_params_pso
best_model_pso = xgb.XGBRegressor(learning_rate=learning_rate,
max_depth=int(max_depth),
min_child_weight=min_child_weight,
subsample=subsample,
colsample_bytree=colsample_bytree,
gamma=gamma,
random_state=42)
best_model_pso.fit(X_train, y_train)
# 验证模型
y_pred_pso = best_model_pso.predict(X_test)
r2_pso = r2_score(y_test, y_pred_pso)
rmse_pso = mean_squared_error(y_test, y_pred_pso, squared=False)
print("粒子群优化(PSO) XGBoost 模型评估结果:")
print(f'R² score: {r2_pso}')
print(f'RMSE: {rmse_pso}')
# 保存模型
best_model_pso.save_model("xgb_model_pso.json")
灰狼优化(GWO)
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
from greyatom import GreyWolfOptimizer
# 加载数据
file_path = 'D:\\pythontest\\pythontest.xlsx'
data = pd.read_excel(file_path, engine='openpyxl')
# 预处理数据
X = data.iloc[:, 2:] # 高光谱值作为特征
y = data.iloc[:, 1] # Li含量作为标签
# 分割数据集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# 定义XGBoost模型的优化目标函数
def objective_function(params):
learning_rate, max_depth, min_child_weight, subsample, colsample_bytree, gamma = params
model = xgb.XGBRegressor(learning_rate=learning_rate,
max_depth=int(max_depth),
min_child_weight=min_child_weight,
subsample=subsample,
colsample_bytree=colsample_bytree,
gamma=gamma,
random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
rmse = mean_squared_error(y_test, y_pred, squared=False)
return rmse
# 设置参数范围
search_space = {"learning_rate": ("float", 0.01, 1.0),
"max_depth": ("int", 1, 10),
"min_child_weight": ("float", 1, 10),
"subsample": ("float", 0.1, 1.0),
"colsample_bytree": ("float", 0.1, 1.0),
"gamma": ("float", 0.1, 1.0)}
# 运行灰狼优化
gwo = GreyWolfOptimizer()
best_params_gwo = gwo.optimize(objective_function, search_space)
# 使用最佳参数重新训练XGBoost模型
learning_rate, max_depth, min_child_weight, subsample, colsample_bytree, gamma = best_params_gwo
best_model_gwo = xgb.XGBRegressor(learning_rate=learning_rate,
max_depth=int(max_depth),
min_child_weight=min_child_weight,
subsample=subsample,
colsample_bytree=colsample_bytree,
gamma=gamma,
random_state=42)
best_model_gwo.fit(X_train, y_train)
# 验证模型
y_pred_gwo = best_model_gwo.predict(X_test)
r2_gwo = r2_score(y_test, y_pred_gwo)
rmse_gwo = mean_squared_error(y_test, y_pred_gwo, squared=False)
print("灰狼优化(GWO) XGBoost 模型评估结果:")
print(f'R² score: {r2_gwo}')
print(f'RMSE: {rmse_gwo}')
# 保存模型
best_model_gwo.save_model("xgb_model_gwo.json")