光谱特征选择之遗传算法GA

肠胃是一种精神

于 2024-04-19 10:53:51 发布

阅读量1k

点赞数 12

文章标签： python 机器学习

本文链接：https://blog.csdn.net/weixin_42621095/article/details/137957090

版权

import random
import math
from matplotlib import pyplot
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cross_decomposition import PLSRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import cross_val_predict
from pandas import read_csv
from scipy import signal

pyplot.rcParams['font.sans-serif'] = ['SimHei']  # 修改为中文字符
pyplot.rcParams['axes.unicode_minus'] = False

data = read_csv(r'C:\Users\Lenovo\Desktop\scores\tree leaf N-train-30ge.csv', header=None)
x = np.array(data.loc[:, 1:])
y = data.loc[:, 0]


iteration = 200  # 迭代次数
pop_size = 30  # 种群个数（30-100）
probability_of_crossover = 0.6  # 交叉概率(一般为0.5-0.8)
probability_of_mutation = 0.1  # 变异概率(一般为0.01-0.1)


# 生成初始化种群
def initial_population():
    population = np.random.randint(2, size=(pop_size, x.shape[1]))
    return population


def crossover(pop, probability_of_crossover=0.5):
    # 12交叉 34交叉 56交叉
    for i in range(0, pop_size, 2):
        if np.random.rand() < probability_of_crossover:

            temporary1 = []
            temporary2 = []

            cross_point = np.random.randint(low=0, high=x.shape[1])  # 随机产生一个交叉点
            # 交换染色体
            temporary1.extend(pop[i][:cross_point])
            temporary1.extend(pop[i+1][cross_point:])
            temporary2.extend(pop[i+1][:cross_point])
            temporary2.extend(pop[i][cross_point:])

            pop[i] = temporary1
            pop[i+1] = temporary2

    return pop


def mutation(pop, probability_of_mutation=0.1):
    for j in range(pop_size):
        if np.random.rand() < probability_of_mutation:  # 判断是否发生变异
            mutate_point = np.random.randint(low=0, high=x.shape[1])  # 随机产生一个变异点
            pop[j][mutate_point] = pop[j][mutate_point] ^ 1  # 将变异点进行异或运算，0变1，1变0

    return pop


def select(pop, fitness):  # 轮盘赌选择后代、按适应度选择后代种群
    index = np.random.choice(np.arange(pop_size), size=pop_size, replace=True,
                             p=fitness/(fitness.sum()))
    # print(index)
    return pop[index]


# 基本PLS回归模型
def base_pls(x, y, n_components):
    pls_simple = PLSRegression(n_components=n_components)
    pls_simple.fit(x, y)
    y_cv = cross_val_predict(pls_simple, x, y, cv=10)
    # score = r2_score(y, y_cv)
    rmsecv = np.sqrt(mean_squared_error(y, y_cv))
    fit_value = 1/(rmsecv + 1)  # 适应度函数值
    return fit_value


# 交叉验证确定主成分数
def pls_optimise_components(x, y, npc):
    rmsecv = np.zeros(npc)
    for j in range(1, npc+1, 1):
        pls_simple = PLSRegression(n_components=j)
        pls_simple.fit(x, y)
        y_cv = cross_val_predict(pls_simple, x, y, cv=10)
        rmsecv[j-1] = np.sqrt(mean_squared_error(y, y_cv))
    opt_comp = np.argmin(rmsecv)  # 最佳主成分数
    return opt_comp+1


# 计算适应度函数
def fitness_judge(pop):
    fitness_value = np.zeros(pop_size)  # 建立一个空数组
    for j in range(pop.shape[0]):
        select_x_index = np.where(np.array(pop[j]) > 0.5)[0]  # 筛选出索引值为1的变量
        x_select = x[:, select_x_index]  # 选出建模需要的变量
        # 导入PLS计算适应度
        n_comp = pls_optimise_components(x_select, y, 28)  # 导入主成分选择函数计算最佳主成分数
        fitness_value[j] = base_pls(x_select, y, n_components=n_comp)  # 使用基本PLS回归函数计算适应度（1/（ rmsecv + 1））
    return fitness_value


if __name__ == "__main__":
    # figure = plt.figure()
    probability = np.zeros(x.shape[1])  # 创建1行，列的零矩阵作为接收器
    pop = initial_population()  # 生成初始种群
    for i in range(iteration):
        pop = crossover(pop, probability_of_crossover=0.5)  # 种群个体交叉
        pop = mutation(pop, probability_of_mutation=0.1)  # 种群个体变异
        # 计算种群中每个个体适应度值
        fitness_value = fitness_judge(pop)  # 返回种群中每个个体的值
        # 选择后代
        pop = select(pop, fitness_value)  # 选择后代
        # 计录每个特征点被选择的次数
        fitness_value_max = np.argmax(fitness_value)  # 找出每一代中适应度最大的个体在种群中的位置
        fitness_max = np.max(fitness_value)
        print("第%d代最优适应度为%f" % (i+1, fitness_max))
        pop_max = pop[fitness_value_max]  # pop_max为最佳个体
        index = np.where(np.array(pop_max) > 0.5)[0]  # 找出为1的索引
        probability[index] += 1  # 被选中的变量次数+1

    m = probability  # 迭代完成后，将每个波段出现的次数储存
    index_max = np.argsort(m)[::-1]  # 将出现次数降序后排列
    print(index_max)
    x1 = np.arange(x.shape[1])
    y1 = m
    plt.figure(figsize=(12, 8), dpi=100)
    plt.scatter(x1, y1)  # 绘制散点图
    # plt.tick_params(labelsize=20)
    plt.xlabel("波段索引", fontsize=20)
    plt.ylabel("被选中次数", fontsize=20)
    plt.title("遗传算法", fontsize=20)
    plt.show()

迭代次数、种群数目、概率等参数根据情况自己调整