光谱特征选择之sipls算法
import numpy as np import matplotlib.pyplot as plt import math from pandas import read_csv from sklearn.cross_decomposition import PLSRegression from sklearn.model_selection import cross_val_predict from sklearn.metrics import mean_squared_error data = read_csv('E:\python data\peach_spectra_brix.csv',header = None) #数据导入 x = np.array(data.loc[:,1:]) y = data.loc[:,0] def splitspectrum(interval_num,x): feature_num = x.shape[1] remaining = feature_num % interval_num # 检查等分 x_block = {} if remaining == 0: interval_size =feature_num/interval_num #划分出的每个区间的波长个数 for i in range(1, interval_num + 1): feature_tart,feature_end = int((i-1) * interval_size), int(i * interval_size) x_block[str(i)] = x[:, feature_tart:feature_end] else: separation = interval_num - remaining # 前几个区间 intervalsize1 = feature_num // interval_num intervalsize2 = feature_num // interval_num + 1 for i in range(1, separation + 1): feature_start, feature_end = int((i - 1) * intervalsize1), int(i * intervalsize1) x_block[str(i)] = x[:, feature_start:feature_end] # 后几个子区间(以separation为界) for i in range(separation + 1, interval_num + 1): feature_s = int((i - separation - 1) * intervalsize2) + feature_end feature_e = int((i - separation) * intervalsize2) + feature_end x_block[str(i)] = x[:, feature_s:feature_e] return x_block def sipls(intervals,x,y): x_block = splitspectrum(intervals,x) rmsecv = [] for i in range(1,intervals+1): for j in range(i+1,intervals+1): print('当前区间为:%d,%d'%(i,j)) x_interval1 = x_block[str(i)] x_interval2 = x_block[str(j)] x_interval = np.append(x_interval1,x_interval2,axis=1) error = [] for component in range(1, 30): pls = PLSRegression(n_components=component) pls.fit(x_interval, y) y_cv = cross_val_predict(pls, x_interval, y, cv=10) rmsecv_temp = math.sqrt(mean_squared_error(y, y_cv)) error.append(rmsecv_temp) rmsecv.append(np.min(error)) print(rmsecv) plt.figure(figsize=(6,4), dpi=300) plt.bar(np.arange(1,46), rmsecv, width=0.5, linewidth=0.4) plt.xlabel("intervals") plt.ylabel("rmsecv") plt.show() #根据自己需求调节数字 sipls(10,x,y)