光谱特征选择之模拟退火算法SA
import numpy as np import matplotlib.pyplot as plt import matplotlib.collections as collections import pandas as pd from scipy import interpolate from scipy.signal import savgol_filter from sys import stdout from sklearn.cross_decomposition import PLSRegression from sklearn.metrics import mean_squared_error, r2_score from sklearn.model_selection import cross_val_predict from pandas import read_csv data = read_csv('E:\python data\peach_spectra_brix.csv',header = None) # wl = np.arange(730,1100.5,0.5) X = np.array(data.loc[:,1:]) y = data.loc[:,0] def base_pls(X,y,n_components, return_model=False): # Simple PLS pls_simple = PLSRegression(n_components=n_components) # Fit pls_simple.fit(X, y) # Cross-validation y_cv = cross_val_predict(pls_simple, X, y, cv=10) # Calculate scores score = r2_score(y, y_cv) rmsecv = np.sqrt(mean_squared_error(y, y_cv)) if return_model == False: return(y_cv, score, rmsecv) else: return(y_cv, score, rmsecv, pls_simple) def pls_optimise_components(X, y, npc): rmsecv = np.zeros(npc) for i in range(1,npc+1,1): # Simple PLS pls_simple = PLSRegression(n_components=i) # Fit pls_simple.fit(X, y) # Cross-validation y_cv = cross_val_predict(pls_simple, X, y, cv=10) # Calculate scores score = r2_score(y, y_cv) rmsecv[i-1] = np.sqrt(mean_squared_error(y, y_cv)) # Find the minimum of ther RMSE and its location opt_comp, rmsecv_min = np.argmin(rmsecv), rmsecv[np.argmin(rmsecv)] return (opt_comp+1, rmsecv_min) def msc(input_data, reference=None): ''' Perform Multiplicative scatter correction''' # mean centre correction for i in range(input_data.shape[0]): input_data[i,:] -= input_data[i,:].mean() # Get the reference spectrum. If not given, estimate it from the mean if reference is None: # Calculate mean ref = np.mean(input_data, axis=0) else: ref = reference # Define a new array and populate it with the corrected data data_msc = np.zeros_like(input_data) for i in range(input_data.shape[0]): # Run regression fit = np.polyfit(ref, input_data[i,:], 1, full=True) # Apply correction data_msc[i,:] = (input_data[i,:] - fit[0][1]) / fit[0][0] return (data_msc, ref) def band_selection_sa(X, y, n_of_bands, max_lv, n_iter): p = np.arange(X.shape[1]) np.random.shuffle(p) bands = p[:n_of_bands] # Selected Bands. Start off with a random selection nbands = p[n_of_bands:] # Excluded bands Xop = X[:, bands] # This is the array to be optimised # Run a PLS optimising the number of latent variables opt_comp, rmsecv_min = pls_optimise_components(Xop, y, max_lv) rms = [] # Here we store the RMSE value as the optimisation progresses for i in range(n_iter): cool = 0.005 * rmsecv_min # cooling parameter. It decreases with the RMSE new_bands = np.copy(bands) new_nbands = np.copy(nbands) r1, r2 = np.random.randint(0, n_of_bands, 2), np.random.randint(n_of_bands, X.shape[1] - n_of_bands, 2) #随机交换两个位置 el1, el2 = new_bands[r1], new_nbands[r2] new_bands[r1] = el2 new_nbands[r2] = el1 Xop = X[:, new_bands] opt_comp_new, rmsecv_min_new = pls_optimise_components(Xop, y, max_lv) # If the new RMSE is less than the previous, accept the change if (rmsecv_min_new < rmsecv_min): bands = new_bands nbands = new_nbands opt_comp = opt_comp_new rmsecv_min = rmsecv_min_new rms.append(rmsecv_min_new) stdout.write("\r" + str(i)) stdout.write(" ") stdout.write(str(opt_comp_new)) stdout.write(" ") stdout.write(str(rmsecv_min_new)) stdout.flush() # If the new RMSE is larger than the previous, accept it with some probability # dictated by the cooling parameter if (rmsecv_min_new > rmsecv_min): prob = np.exp(-(rmsecv_min_new - rmsecv_min) / cool) # probability if (np.random.random() < prob): bands = new_bands nbands = new_nbands opt_comp = opt_comp_new rmsecv_min = rmsecv_min_new rms.append(rmsecv_min_new) stdout.write("\r" + str(i)) stdout.write(" ") stdout.write(str(opt_comp_new)) stdout.write(" ") stdout.write(str(rmsecv_min_new)) stdout.flush() else: rms.append(rmsecv_min) stdout.write("\n") print(np.sort(bands)) print('end') return (bands, opt_comp, rms) Xc, ref = msc(X) Xc = savgol_filter(Xc, 11, 3, deriv=1) bands, opt_comp, rms = band_selection_sa(Xc, y, n_of_bands=40, max_lv=40, n_iter=10000) predicted, r2cv, rmscv = base_pls(Xc[:, bands], y, opt_comp) print("PLS optimised bands:") print(" R2: %5.3f, RMSE: %5.3f" % (r2cv, rmscv)) print(" Number of Latent Variables: "+str(opt_comp)) plt.figure(figsize=(8, 6)) plt.semilogx(rms) plt.xlabel("Iteration ") plt.ylabel("RMSECV") plt.show()