import random import math from matplotlib import pyplot import numpy as np import matplotlib.pyplot as plt from sklearn.cross_decomposition import PLSRegression from sklearn.metrics import mean_squared_error, r2_score from sklearn.model_selection import cross_val_predict from pandas import read_csv from scipy import signal pyplot.rcParams['font.sans-serif'] = ['SimHei'] # 修改为中文字符 pyplot.rcParams['axes.unicode_minus'] = False data = read_csv(r'C:\Users\Lenovo\Desktop\scores\tree leaf N-train-30ge.csv', header=None) x = np.array(data.loc[:, 1:]) y = data.loc[:, 0] iteration = 200 # 迭代次数 pop_size = 30 # 种群个数(30-100) probability_of_crossover = 0.6 # 交叉概率(一般为0.5-0.8) probability_of_mutation = 0.1 # 变异概率(一般为0.01-0.1) # 生成初始化种群 def initial_population(): population = np.random.randint(2, size=(pop_size, x.shape[1])) return population def crossover(pop, probability_of_crossover=0.5): # 12交叉 34交叉 56交叉 for i in range(0, pop_size, 2): if np.random.rand() < probability_of_crossover: temporary1 = [] temporary2 = [] cross_point = np.random.randint(low=0, high=x.shape[1]) # 随机产生一个交叉点 # 交换染色体 temporary1.extend(pop[i][:cross_point]) temporary1.extend(pop[i+1][cross_point:]) temporary2.extend(pop[i+1][:cross_point]) temporary2.extend(pop[i][cross_point:]) pop[i] = temporary1 pop[i+1] = temporary2 return pop def mutation(pop, probability_of_mutation=0.1): for j in range(pop_size): if np.random.rand() < probability_of_mutation: # 判断是否发生变异 mutate_point = np.random.randint(low=0, high=x.shape[1]) # 随机产生一个变异点 pop[j][mutate_point] = pop[j][mutate_point] ^ 1 # 将变异点进行异或运算,0变1,1变0 return pop def select(pop, fitness): # 轮盘赌选择后代、按适应度选择后代种群 index = np.random.choice(np.arange(pop_size), size=pop_size, replace=True, p=fitness/(fitness.sum())) # print(index) return pop[index] # 基本PLS回归模型 def base_pls(x, y, n_components): pls_simple = PLSRegression(n_components=n_components) pls_simple.fit(x, y) y_cv = cross_val_predict(pls_simple, x, y, cv=10) # score = r2_score(y, y_cv) rmsecv = np.sqrt(mean_squared_error(y, y_cv)) fit_value = 1/(rmsecv + 1) # 适应度函数值 return fit_value # 交叉验证确定主成分数 def pls_optimise_components(x, y, npc): rmsecv = np.zeros(npc) for j in range(1, npc+1, 1): pls_simple = PLSRegression(n_components=j) pls_simple.fit(x, y) y_cv = cross_val_predict(pls_simple, x, y, cv=10) rmsecv[j-1] = np.sqrt(mean_squared_error(y, y_cv)) opt_comp = np.argmin(rmsecv) # 最佳主成分数 return opt_comp+1 # 计算适应度函数 def fitness_judge(pop): fitness_value = np.zeros(pop_size) # 建立一个空数组 for j in range(pop.shape[0]): select_x_index = np.where(np.array(pop[j]) > 0.5)[0] # 筛选出索引值为1的变量 x_select = x[:, select_x_index] # 选出建模需要的变量 # 导入PLS计算适应度 n_comp = pls_optimise_components(x_select, y, 28) # 导入主成分选择函数计算最佳主成分数 fitness_value[j] = base_pls(x_select, y, n_components=n_comp) # 使用基本PLS回归函数计算适应度(1/( rmsecv + 1)) return fitness_value if __name__ == "__main__": # figure = plt.figure() probability = np.zeros(x.shape[1]) # 创建1行,列的零矩阵作为接收器 pop = initial_population() # 生成初始种群 for i in range(iteration): pop = crossover(pop, probability_of_crossover=0.5) # 种群个体交叉 pop = mutation(pop, probability_of_mutation=0.1) # 种群个体变异 # 计算种群中每个个体适应度值 fitness_value = fitness_judge(pop) # 返回种群中每个个体的值 # 选择后代 pop = select(pop, fitness_value) # 选择后代 # 计录每个特征点被选择的次数 fitness_value_max = np.argmax(fitness_value) # 找出每一代中适应度最大的个体在种群中的位置 fitness_max = np.max(fitness_value) print("第%d代最优适应度为%f" % (i+1, fitness_max)) pop_max = pop[fitness_value_max] # pop_max为最佳个体 index = np.where(np.array(pop_max) > 0.5)[0] # 找出为1的索引 probability[index] += 1 # 被选中的变量次数+1 m = probability # 迭代完成后,将每个波段出现的次数储存 index_max = np.argsort(m)[::-1] # 将出现次数降序后排列 print(index_max) x1 = np.arange(x.shape[1]) y1 = m plt.figure(figsize=(12, 8), dpi=100) plt.scatter(x1, y1) # 绘制散点图 # plt.tick_params(labelsize=20) plt.xlabel("波段索引", fontsize=20) plt.ylabel("被选中次数", fontsize=20) plt.title("遗传算法", fontsize=20) plt.show()
迭代次数、种群数目、概率等参数根据情况自己调整