鲸鱼优化算法优化DecisionTreeRegressor-Python

雷古小狮子

已于 2024-08-14 17:24:50 修改

阅读量234

点赞数 1

文章标签： python 机器学习

于 2024-08-08 21:01:57 首次发布

本文链接：https://blog.csdn.net/qq_45876576/article/details/141035945

版权

要求：计算各个特征的标准差，取标准差比较小的特征，
然后pca处理，取6-8个特征来预测结果
使用woa-dt模型
B战讲解视频：【鲸鱼优化算法优化DecisionTreeRegressor-Python】 https://www.bilibili.com/video/BV1trYie4EtE/?share_source=copy_web&vd_source=928f967ecbd95c9b874da5eb502e8c69

数据预处理

查看数据，并删除异常值

import pandas as pd
import numpy as np
data = pd.read_csv('F:/待做/项目/woa-df机器学习/python/data.csv')
pd.set_option('display.max_columns',18)

print(data.describe())
# 基于箱线图方法的异常值识别
def detect_outliers_zscore(data, threshold=4.5):
    z_scores = np.abs((data - data.mean()) / data.std())  # 计算Z-Scores
    outlier_indexes = np.unique(np.where(z_scores > threshold)[0]) # 找到超过阈值的Z-Scores的行索引
    return list(outlier_indexes)

# 使用Z-Score方法识别异常值
outliers = detect_outliers_zscore(data)
print(outliers)  # 输出异常值的行索引
print(len(outliers))

计算各个特征的标准差，取标准差比较小的特征

#计算各个特征的标准差
features = data.drop(['X17','index'],axis=1)
std_devs = features.std()
print("各个特征的标准差:", std_devs)
std_devs_desc = std_devs.sort_values(ascending=False)
print(std_devs_desc)

pca处理

features = features.drop(['X6','X7'],axis=1)
target = data['X17']
#初始化PCA对象
pca = PCA(n_components=8)
x_pca = pca.fit_transform(features)

woa-df

# 评估函数，计算决策树回归模型在给定超参数下的表现
def evaluate(max_depth, min_samples_split, min_samples_leaf, x, y, kf):
    params = {
        'max_depth': int(max_depth) if max_depth else None,
        'min_samples_split': int(min_samples_split),
        'min_samples_leaf': int(min_samples_leaf),
    }
    dt = DecisionTreeRegressor(**params)

    mse_scores = -cross_val_score(dt, x, y, cv=kf, scoring='neg_mean_squared_error')
    return np.mean(mse_scores)

# 鲸鱼优化算法的初始化，为决策树参数调整范围
def initialization_WOA(popsize, dim_limits):
    pop = np.zeros([popsize, len(dim_limits)])
    for i in range(popsize):
        for j in range(len(dim_limits)):
            pop[i, j] = np.random.uniform(low=dim_limits[j][0], high=dim_limits[j][1])
    return pop


# WOA算法主体
def whale_optimization_algorithm(x, y, pop_size, max_iter, dim_limits, kf):
    # 初始化种群
    whale_pop = initialization_WOA(pop_size, dim_limits)
    fitness = np.array([evaluate(whale[0], whale[1], whale[2], x, y, kf) for whale in whale_pop])

    # 最佳适应度和位置
    best_fitness = np.min(fitness)
    best_position = whale_pop[np.argmin(fitness)]

    for iteration in range(max_iter):
        a = 2 - iteration * (2 / max_iter)

        leader_index = np.argmin(fitness)
        leader = whale_pop[leader_index]

        for i in range(pop_size):
            r1 = np.random.random()
            r2 = np.random.random()
            A = 2 * a * r1 - a
            C = 2 * r2

            p = np.random.random()

            for j in range(whale_pop.shape[1]):
                if p < 0.5:
                    if np.abs(A) > 1:
                        rand_index = np.random.randint(0, pop_size)
                        D_rand = np.abs(C * whale_pop[rand_index] - whale_pop[i])
                        whale_pop[i] = whale_pop[rand_index] - A * D_rand
                    else:
                        D_Leader = np.abs(C * leader[j] - whale_pop[i][j])
                        whale_pop[i][j] = leader[j] - A * D_Leader
                else:
                    distance2Leader = np.abs(leader[j] - whale_pop[i][j])
                    whale_pop[i][j] = distance2Leader * np.exp(a * iteration) * np.cos(2 * np.pi * iteration) + leader[
                        j]

            # 保持解在搜索空间内
            for j in range(whale_pop.shape[1]):
                whale_pop[i][j] = np.clip(whale_pop[i][j], dim_limits[j][0], dim_limits[j][1])

            # 计算新的适应度
            new_fitness = evaluate(whale_pop[i][0], whale_pop[i][1], whale_pop[i][2], x, y, kf)
            if new_fitness < fitness[i]:  # 有改进则更新适应度和位置
                fitness[i] = new_fitness
                if new_fitness < best_fitness:
                    best_fitness = new_fitness
                    best_position = whale_pop[i]

    return best_fitness, best_position


# 决策树超参数的取值范围
dim_limits = [(1, 200), (2, 40), (1,40)]

# 执行鲸鱼优化算法来寻找最佳决策树参数
pop_size, max_iter = 40, 20
kf = KFold(n_splits=10, shuffle=True, random_state=1)  # 定义 KFold 对象
best_fitness, best_params = whale_optimization_algorithm(features, target, pop_size, max_iter, dim_limits, kf)

参考资料：【【智能优化算法】鲸鱼优化算法基础与实战，附原理讲义及实战代码，数学建模国赛美赛必看】 https://www.bilibili.com/video/BV1gU411S7TL/?share_source=copy_web&vd_source=928f967ecbd95c9b874da5eb502e8c69