% 假设我们有一个名为data的矩阵,其中包含了我们的测试数据
% 这里我们使用MATLAB内置的iris数据集作为示例
load fisheriris
data = meas; % meas是150x4的矩阵,包含了150个样本和4个特征
% 计算主成分
[coeff, score, latent, tsquared, explained] = pca(data);
% 计算KMO检验的值
kmo_value = calculateKMO(data);
% 输出KMO值
fprintf('KMO Test Value: %f\n', kmo_value);
% 结果可视化
% 可视化前N个主成分(根据后面的帕累托图选择主成分数)
PCANum = 2;
figure;
biplot(coeff(:,1:PCANum), 'Scores', score(:,1:PCANum), 'VarLabels', {'Var1','Var2','Var3','Var4'});
xlabel("Component 1")
ylabel("Component 2")
zlabel("Component 3")
title('PCA Biplot: First two Principal Components');
% 可视化累计解释的方差百分比
figure;
pareto(explained)
xlabel('Principal Component')
ylabel('Variance Explained (%)')
title('Scree Plot')
% 可视化前两个主成分的得分,并添加置信椭圆
figure;
scatter(score(:,1), score(:,2)); % 绘制得分散点图
xlabel('Principal Component 1');
ylabel('Principal Component 2');
title('PCA Scores with Confidence Ellipse');
% 计算并绘制置信椭圆
hold on; % 保持当前图形,以便在同一图上绘制椭圆
confidenceEllipse(score(:,1), score(:,2));
hold off;
% 置信椭圆绘制函数
function confidenceEllipse(x, y)
% 计算均值
mu = mean([x y]);
% 计算协方差矩阵
sigma = cov(x, y);
% 置信椭圆的参数
s = chi2inv(0.95, 2); % 95%置信区间的卡方值
% 创建椭圆
[eigenvec, eigenval] = eig(sigma);
[largest_eigenvec_ind_c, ~] = find(eigenval == max(max(eigenval)));
largest_eigenvec = eigenvec(:, largest_eigenvec_ind_c);
% 确定椭圆的角度和大小
angle = atan2(largest_eigenvec(2), largest_eigenvec(1));
if angle < 0
angle = angle + 2*pi;
end
chisquare_val = sqrt(s);
theta_grid = linspace(0,2*pi);
phi = angle;
X0=mu(1);
Y0=mu(2);
a=chisquare_val*sqrt(eigenval(largest_eigenvec_ind_c,largest_eigenvec_ind_c));
b=chisquare_val*sqrt(eigenval(3-largest_eigenvec_ind_c,3-largest_eigenvec_ind_c));
% 椭圆的参数方程
ellipse_x_r = a*cos( theta_grid );
ellipse_y_r = b*sin( theta_grid );
% 旋转椭圆并绘制
R = [ cos(phi) sin(phi); -sin(phi) cos(phi) ];
r_ellipse = [ellipse_x_r;ellipse_y_r]' * R;
plot(r_ellipse(:,1) + X0, r_ellipse(:,2) + Y0, 'r-');
end
% 计算KMO值的函数
function kmo = calculateKMO(X)
% 计算相关系数矩阵
R = corrcoef(X);
% 计算偏相关系数矩阵
invR = inv(R);
partialCorrelation = -invR ./ sqrt(diag(invR)*diag(invR)');
% 计算KMO统计量
numerator = sum(R(:).^2) - sum(diag(R).^2);
denominator = numerator + sum(partialCorrelation(:).^2) - sum(diag(partialCorrelation).^2);
kmo = numerator / denominator;
end
结果如下图所示: