光谱特征选择之UVE算法
from sklearn.cross_decomposition import PLSRegression from scipy import signal import matplotlib.pyplot as plt import numpy as np from pandas import read_csv import warnings warnings.filterwarnings("ignore") data = read_csv(r'C:\Users\Lenovo\Desktop\scores\tree leaf N-train-30ge.csv', header=None) # 数据导入 x = np.array(data.loc[:, 1:]) y = np.array(data.loc[:, 0]) wl = np.linspace(866, 1701, 228) #wl = np.linspace(0, 700, 700) def uve(m, n): """ :param m: 校正集样本数量 :param n: 光谱特征数目 :return: """ noise = np.random.normal(loc=0, scale=0.01, size=(m, n)) # 产生一个(m,n)的噪音矩阵 #noise = np.random.uniform(-0.0000000001, 0.0000000001, size=(m, n)) new_x = np.append(x, noise, axis=1) # 将噪音矩阵与光谱矩阵合并 # 留一交叉验证,得到n个PLS回归系数组成的矩阵(n,2m) PLScoef = np.zeros((new_x.shape[0], new_x.shape[1])) for i in range(m): new_x_delete = np.delete(new_x, i, axis=0) # 删除第i行,留一交叉验证 y_delete = np.delete(y, i, axis=0) pls = PLSRegression(n_components=25) # 第一步进行PLS回归的最佳主成分数 pls.fit(new_x_delete, y_delete) #PLScoef[i, :] = pls.coef_.T.reshape(1, -1)[0] PLScoef[i, :] = pls.coef_.reshape(1, -1)[0] meancoef = np.mean(PLScoef, axis=0) stdcoef = np.std(PLScoef, axis=0) h = meancoef / stdcoef # 按列计算矩阵的标准偏差和平均值向量,然后计算h(i)=me(i)/s(i) h_select = h[n:] h_max = np.max(abs(h_select)) index = [] for j in range(0, n): if h_max <= abs(h[j]): # 保留绝对值大于h_max的变量 index.append(j) selected_wave = index print(selected_wave) selected_wavelength = x[:, index] print(selected_wavelength.shape) return selected_wave selected_wave1 = uve(120, 228) plt.figure(figsize=(12, 8), dpi=100) plt.plot(wl, x[2, :].T) plt.scatter(wl[selected_wave1], x[2, :][selected_wave1], marker='*', color='red') plt.show()