光谱特征选择之sipls算法

光谱特征选择之sipls算法

import numpy as np
import matplotlib.pyplot as plt
import math
from pandas import read_csv
from sklearn.cross_decomposition import PLSRegression
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import mean_squared_error

data = read_csv('E:\python data\peach_spectra_brix.csv',header = None)   #数据导入
x = np.array(data.loc[:,1:])
y = data.loc[:,0]

def splitspectrum(interval_num,x):
    feature_num = x.shape[1]
    remaining = feature_num % interval_num  # 检查等分
    x_block = {}
    if remaining == 0:
        interval_size =feature_num/interval_num #划分出的每个区间的波长个数
        for i in range(1, interval_num + 1):
            feature_tart,feature_end = int((i-1) * interval_size), int(i * interval_size)
            x_block[str(i)] = x[:, feature_tart:feature_end]
    else:
        separation = interval_num - remaining  # 前几个区间
        intervalsize1 = feature_num // interval_num
        intervalsize2 = feature_num // interval_num + 1
        for i in range(1, separation + 1):
            feature_start, feature_end = int((i - 1) * intervalsize1), int(i * intervalsize1)
            x_block[str(i)] = x[:, feature_start:feature_end]

            # 后几个子区间(以separation为界)
        for i in range(separation + 1, interval_num + 1):
            feature_s = int((i - separation - 1) * intervalsize2) + feature_end
            feature_e = int((i - separation) * intervalsize2) + feature_end
            x_block[str(i)] = x[:, feature_s:feature_e]

    return x_block

def sipls(intervals,x,y):
    x_block = splitspectrum(intervals,x)
    rmsecv = []
    for i in range(1,intervals+1):
        for j in range(i+1,intervals+1):
            print('当前区间为:%d,%d'%(i,j))
            x_interval1 = x_block[str(i)]
            x_interval2 = x_block[str(j)]
            x_interval = np.append(x_interval1,x_interval2,axis=1)
            error = []
            for component in range(1, 30):
                pls = PLSRegression(n_components=component)
                pls.fit(x_interval, y)
                y_cv = cross_val_predict(pls, x_interval, y, cv=10)
                rmsecv_temp = math.sqrt(mean_squared_error(y, y_cv))
                error.append(rmsecv_temp)
            rmsecv.append(np.min(error))
    print(rmsecv)
    plt.figure(figsize=(6,4), dpi=300)
    plt.bar(np.arange(1,46), rmsecv, width=0.5, linewidth=0.4)
    plt.xlabel("intervals")
    plt.ylabel("rmsecv")
    plt.show()

#根据自己需求调节数字
sipls(10,x,y)

  • 4
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值