非线性普遍存在于工业系统中,这里用核函数来处理原始数据,使得原始数据近似为线性数据结构。再通过偏最小二乘法进行质量指标的预测,T2/SPE统计量来检测系统故障。
数据
链接:https://pan.baidu.com/s/1yHgZRqy6yyk9wsm6igqUSw
提取码:odsz
核函数
本代码使用高斯核函数:
function K = gaussian_kernel(X, Y, sigma)
% X: 第一个数据集 (行表示样本,列表示特征)
% Y: 第二个数据集
% sigma: 高斯核的宽度参数
% 获取数据集的尺寸
nX = size(X, 1); % X 中的样本数
nY = size(Y, 1); % Y 中的样本数
% 计算 X 和 Y 之间的欧几里得距离的平方
% 这里使用矩阵运算来加速计算
XX = sum(X.^2, 2); % X 每个样本的平方和
YY = sum(Y.^2, 2); % Y 每个样本的平方和
% 使用矩阵操作计算欧几里得距离的平方
D = XX + YY' - 2 * (X * Y'); % 欧几里得距离的平方矩阵
% 计算高斯核矩阵
K = exp(-D / (2 * sigma^2));
end
T2控制限
function T2_limit = calculateT2Limit(n, p, alpha)
% 计算 T^2 控制限
% 输入:
% n - 样本数
% p - 主元数
% alpha - 置信水平,通常是 0.05
%
% 输出:
% T2_limit - 计算得到的 T^2 控制限
% 计算 F 分布的临界值
F_critical = finv(1-alpha, p, n-p);
% 计算 T^2 控制限
T2_limit = (p * (n * n-1) / (n*(n-p))) * F_critical;
end
主程序
clear all;
close all;
clc;
%% 参数选取
%核函数参数
per =225;
%置信度
alpha = 0.01;
%% 数据导入及处理
X_Y_train = matfile('X_Y_train.mat');
X_train = X_Y_train.X_train;
Y_train = X_Y_train.Y_train;
Y_train = Y_train(:,1);
Y_train_s = Y_train;
X_Y_test = matfile('X_Y_test_IDV01.mat');
X_test = X_Y_test.X_test;
Y_test = X_Y_test.Y_test;
Y_test = Y_test(:,1);
%样本数、过程变量数、Z-score标准化
[~, m] = size(X_train); [n, p] = size(Y_train);
[X_train, Xmean, Xstd] = zscore(X_train); [Y_train, Ymean, Ystd] = zscore(Y_train);
[N, ~] = size(X_test);
X_test = (X_test - repmat(Xmean, N, 1))./repmat(Xstd, N, 1); Y_test = (Y_test - repmat(Ymean, N, 1))./repmat(Ystd, N, 1);
%数据预处理
eye_1 = eye(n);
ones_1 = ones(n,1);
%核函数处理
K_X_train = gaussian_kernel(X_train,X_train,per);
K_X_test = gaussian_kernel(X_train,X_test,per);
%去均值处理
K_X_train_N_mean = (eye_1 - ((ones_1 * ones_1') / n)) * K_X_train * (eye_1 - ((ones_1 * ones_1') / n));
K_X_train_N_mean_s = K_X_train_N_mean;
%% KPLS
num_c = 2;
T = [];U = [];
u = Y_train(:,1);
for i=1:num_c
t_old = randn(n,1);
for j = 1:100
t = K_X_train_N_mean_s *u;
t = t/norm(t);
q = Y_train_s' * t;
u = Y_train_s * q;
u = u/norm(u);
if norm(t-t_old)<1e-6
break;
end
t_old = t;
end
T=[T t];
U=[U u];
Y_train_s =(eye_1-t *t')* Y_train_s;
K_X_train_N_mean_s = (eye_1-t *t')*K_X_train_N_mean_s*(eye_1-t *t');
end
%% 预测
K_X_test_N_mean_T = (K_X_test' - (ones_1'/n)*K_X_train)*(eye_1-ones_1*ones_1'/n);
%预测矩阵
B = U * inv(T' * K_X_train_N_mean * U) * T' * Y_train;
%预测质量指标
Y_test_p = K_X_test_N_mean_T*B;
plot(Y_test_p+mean(Y_train),'b-')
hold on
plot(Y_test,'r-')
legend('预测值','真实值');
xlabel('样本编号');
ylabel('响应值');
title('KPLS回归预测');
%% 在线监测
%% T2
T_beta = U * inv(T' * K_X_train_N_mean * U);
T_new_T = K_X_test_N_mean_T * T_beta;
T2 = [];
T2 = diag((T_new_T*inv(cov(T)))*T_new_T');
%T2控制限
T2_limit = calculateT2Limit(n,num_c,alpha);
figure;
subplot(2,1,1);
plot(T2);
hold on;
line([1 N], [T2_limit T2_limit], 'Color', 'red');
title('测试集 Hotelling''s T² 控制图');
xlabel('样本编号');
ylabel('T² 值');
%% SPE
SPE = [];
for i = 1:N
K_X_test_1 = gaussian_kernel(X_train,X_test(i,:),per);
K_X_test_N_mean_T_1 = (K_X_test_1' - (ones_1'/n)*K_X_train)*(eye_1-ones_1*ones_1'/n);
T_new_T_1 = K_X_test_N_mean_T_1 * T_beta;
SPE_c = 1+ (sum(K_X_train(:))/(n*n)) -(2/n) * sum(K_X_test_1)-2 * K_X_test_N_mean_T_1 * T * T_new_T_1'+T_new_T_1 * T' * K_X_train_N_mean * T * T_new_T_1';
SPE = [SPE,SPE_c];
end
%SPE控制限
SPE_limit_s = [];
for i = 1:n
K_X_test_1 = gaussian_kernel(X_train,X_train(i,:),per);
K_X_test_N_mean_T_1 = (K_X_test_1' - (ones_1'/n)*K_X_train)*(eye_1-ones_1*ones_1'/n);
T_new_T_1 = K_X_test_N_mean_T_1 * T_beta;
SPE_c = 1 + (sum(K_X_train(:))/(n*n)) ...
- (2/n) * sum(K_X_test_1)-2 * K_X_test_N_mean_T_1 * T * T_new_T_1'...
+T_new_T_1 * T' * K_X_train_N_mean * T * T_new_T_1';
SPE_limit_s = [SPE_limit_s,SPE_c];
end
mean_SPE_limit = mean(SPE_limit_s);%均值
var_SPE_limit = var(SPE_limit_s);%方差
g =var_SPE_limit/(2*mean_SPE_limit);
h = (2 * mean_SPE_limit * mean_SPE_limit)/var_SPE_limit;
SPE_limit = g * chi2inv(1-alpha,h);
subplot(2,1,2);
plot(SPE);
hold on;
line([1 N], [SPE_limit SPE_limit], 'Color', 'red');
title('测试集 SPE 控制图');
xlabel('样本编号');
ylabel('SPE 值');
仿真结果
总结
通过上述程序,实现了对TE过程中,目标变量的预测,并通过T2/SPE统计量,成功检测系统故障发生时间。本文直接定义主元个数,当然也可以通过交叉-检验法确定主元个数,本文未能呈现。但是由于作者能力有限,可能代码中存在诸多问题,望大家能够指出,感激不尽。
参考文献
[1]王光.基于关键性能指标的数据驱动故障检测方法研究[D].哈尔滨工业大学,2016.