应用偏最小二乘回归（PLSR）对NIR光谱与样本中RON含量进行定量分析

最新推荐文章于 2025-02-24 07:45:00 发布

li_huifei

最新推荐文章于 2025-02-24 07:45:00 发布

阅读量8.1k

点赞数 9

分类专栏：学习笔记

本文链接：https://blog.csdn.net/li_huifei/article/details/78467689

版权

学习笔记专栏收录该内容

18 篇文章

订阅专栏

本文对偏最小二乘法（PLS——Partial Least Squares Regression，也缩写为PLSR）的算法原理进行了简单的推导，并结合汽油近红外光谱举例了算法具体应用的实例。原本写了word版本的报告，实在是懒得再在这边写一遍，所以偷懒直接放图片过来了，最后贴上了原程序。

算法原理部分参考了没有注明作者的一篇论文..

import csv
from sklearn import preprocessing
from sklearn.cross_validation import train_test_split
from sklearn.decomposition import RandomizedPCA
from sklearn.cross_decomposition import PLSRegression 
import numpy as np
import math
import matplotlib.pyplot as plt

A = np.loadtxt('A.csv',delimiter=',')#读入数据
print A.shape
Olefin= np.loadtxt('Olefin.csv',delimiter=',')
print Olefin.shape
RON = np.loadtxt('RON.csv',delimiter=',')
print RON.shape
OR = np.array((RON,Olefin)).T
print OR.shape
nm = np.loadtxt('nm.csv',delimiter=',')

def error(y_predict,y_test):#定义计算误差平方和函数
    errs = []
    for i in range(len(y_predict)):
        e = (y_predict[i]-y_test[i])**2
        errs.append(e)
    return sum(errs)

x_train, x_test, y_train, y_test = train_test_split(
    A, RON, test_size=0.3)#划分训练集测试集
x_train_st = preprocessing.scale(x_train)#数据标准化
n_components = 0

while n_components<x_train_st.shape[1]:
    n_components+=1
    pls2 = PLSRegression(n_components=n_components)#计算SS
    pls2.fit(x_train_st, y_train)
    y_predict0 = pls2.predict(x_train_st)
    SS = error(y_predict0,y_train)
    y_predict1 = []
    for i in range(x_train_st.shape[0]):#计算PRESS

        n_components1 = n_components+1
        x_train_st1 = np.delete(x_train_st,i,0)
        y_train_st1 = np.delete(y_train_st,i,0)
        pls2 = PLSRegression(n_components=n_components1)
        pls2.fit(x_train_st, y_train)
        y_predict11 = pls2.predict(x_train_st[i])
        y_predict1.append(y_predict11)
    PRESS = error(y_predict1,y_train)
    Qh = 1-float(PRESS/SS)
    if Qh<0.0985:#判断精度
        plt.figure(1)
        plt.scatter(y_predict0,y_train)
        plt.figure(2)
        plt.scatter(y_predict1,y_train)
        print 'the Qh is ',Qh
        print 'the PRESS is',PRESS
        print 'the SS is',SS
        break

print 'n_components is ',n_components+1
SECs = []
errors = []
e = 100
for i in range(10):#循环测试
    #print i 
    x_train, x_test, y_train, y_test = train_test_split(
    A, RON, test_size=0.5)
    x_test_st = preprocessing.scale(x_test)
    y_predict = pls2.predict(x_test_st)
    SECs.append(np.sqrt(error(y_predict,y_test)/(y_test.shape[0]-1)))
    errors.append(float(error(y_predict,y_test)))
    if SECs[-1]<e:
        y_predict_min = y_predict
        y_test_min = y_test
        
print 'the prediced value is ' ,y_predict.T#画图，打印结果
print 'the true value is',y_test
print 'the mean error is',float(np.mean(errors))
print "the mean SEC is ",float(np.mean(SECs))

plt.figure(3)
plt.scatter(y_predict_min,y_test_min)