PCA及kmeans介绍见吴恩达机器学习–PCA、吴恩达机器学习–kmeans.
代码:
%% Step1: load data and preprocess
X = load(mat_path, 'mean_data');
X = (X.mean_data-mean(X.mean_data,1))./var(X.mean_data);
%% Step2: compute covariance matrix
for k = 1:size(X,2)
number_of_samples = size(X,1);
sigma = (1/number_of_samples)*X'*X;
%% Step3: compute eigenvectors of matrix sigma
[U,S,V] = svd(sigma);
U_reduce = U(:,1:k);
Z = X*U_reduce;
%% Step4: check
S_k = 0;
S_n = sum(diag(S));
for i = 1:k
S_k = S_k + S(i,i);
end
if S_k / S_n >= 0.99
fprintf("%.2f%% variance retained\n", 100*S_k/S_n);
break;
else
if k==2
Z_to_plot = Z;
end
if k==3
Z_to_plot3 = Z;
end
fprintf("%.2f%% variance retained\n", 100*S_k/S_n);
continue;
end
end
% save('./mean_data_reduced.mat','Z');
opts = statset('Display','final');
[cidx, ctrs] = kmeans(Z_to_plot3, 2, 'Distance','sqeuclidean', ...
'Replicates',5, 'Options',opts);
figure(1)
plot(Z_to_plot(cidx==1,1),Z_to_plot(cidx==1,2),'r.', ...
Z_to_plot(cidx==2,1),Z_to_plot(cidx==2,2),'b.', ...
Z_to_plot(cidx==3,1),Z_to_plot(cidx==3,2),'g.', ...
ctrs(:,1),ctrs(:,2),'m*');
figure(2)
plot3(Z_to_plot3(cidx==1,1),Z_to_plot3(cidx==1,2),Z_to_plot3(cidx==1,3),'r.', ...
Z_to_plot3(cidx==2,1),Z_to_plot3(cidx==2,2),Z_to_plot3(cidx==2,3),'b.', ...
Z_to_plot3(cidx==3,1),Z_to_plot3(cidx==3,2),Z_to_plot3(cidx==3,3),'g.', ...
ctrs(:,1),ctrs(:,2),ctrs(:,3),'m*');
可视化: