本文对偏最小二乘法(PLS——Partial Least Squares Regression,也缩写为PLSR)的算法原理进行了简单的推导,并结合汽油近红外光谱举例了算法具体应用的实例。原本写了word版本的报告,实在是懒得再在这边写一遍,所以偷懒直接放图片过来了,最后贴上了原程序。
算法原理部分参考了没有注明作者的一篇论文..
import csv from sklearn import preprocessing from sklearn.cross_validation import train_test_split from sklearn.decomposition import RandomizedPCA from sklearn.cross_decomposition import PLSRegression import numpy as np import math import matplotlib.pyplot as plt A = np.loadtxt('A.csv',delimiter=',')#读入数据 print A.shape Olefin= np.loadtxt('Olefin.csv',delimiter=',') print Olefin.shape RON = np.loadtxt('RON.csv',delimiter=',') print RON.shape OR = np.array((RON,Olefin)).T print OR.shape nm = np.loadtxt('nm.csv',delimiter=',') def error(y_predict,y_test):#定义计算误差平方和函数 errs = [] for i in range(len(y_predict)): e = (y_predict[i]-y_test[i])**2 errs.append(e) return sum(errs) x_train, x_test, y_train, y_test = train_test_split( A, RON, test_size=0.3)#划分训练集测试集 x_train_st = preprocessing.scale(x_train)#数据标准化 n_components = 0 while n_components<x_train_st.shape[1]: n_components+=1 pls2 = PLSRegression(n_components=n_components)#计算SS pls2.fit(x_train_st, y_train) y_predict0 = pls2.predict(x_train_st) SS = error(y_predict0,y_train) y_predict1 = [] for i in range(x_train_st.shape[0]):#计算PRESS n_components1 = n_components+1 x_train_st1 = np.delete(x_train_st,i,0) y_train_st1 = np.delete(y_train_st,i,0) pls2 = PLSRegression(n_components=n_components1) pls2.fit(x_train_st, y_train) y_predict11 = pls2.predict(x_train_st[i]) y_predict1.append(y_predict11) PRESS = error(y_predict1,y_train) Qh = 1-float(PRESS/SS) if Qh<0.0985:#判断精度 plt.figure(1) plt.scatter(y_predict0,y_train) plt.figure(2) plt.scatter(y_predict1,y_train) print 'the Qh is ',Qh print 'the PRESS is',PRESS print 'the SS is',SS break print 'n_components is ',n_components+1 SECs = [] errors = [] e = 100 for i in range(10):#循环测试 #print i x_train, x_test, y_train, y_test = train_test_split( A, RON, test_size=0.5) x_test_st = preprocessing.scale(x_test) y_predict = pls2.predict(x_test_st) SECs.append(np.sqrt(error(y_predict,y_test)/(y_test.shape[0]-1))) errors.append(float(error(y_predict,y_test))) if SECs[-1]<e: y_predict_min = y_predict y_test_min = y_test print 'the prediced value is ' ,y_predict.T#画图,打印结果 print 'the true value is',y_test print 'the mean error is',float(np.mean(errors)) print "the mean SEC is ",float(np.mean(SECs)) plt.figure(3) plt.scatter(y_predict_min,y_test_min)
应用偏最小二乘回归(PLSR)对NIR光谱与样本中RON含量进行定量分析
最新推荐文章于 2025-02-24 07:45:00 发布