最近,接触了一下聚类算法。其中最简单最经典的莫过于 kmeans 算法。我了解了算法之后,着手编写了 matlab 代码。我的代码只有20几行,比起网上的很多代码都要简洁。
function [cls,ctr]=kmeans(data,K,tol)
% Kmeans
% input: data K tol
% output: cls: clusters, ctr: centroids
% example:
% data=rand(100,2);
% K=4;tol=0.01;
% [cls,ctr]=kmeans(data,K,tol);
% clr=['r','k','g','b'];
% for k=1:K
% plot(cls{k}(:,1),cls{k}(:,2),[clr(k),'o']);hold on;
% plot(ctr(k,1),ctr(k,2),[clr(k),'+']);hold on;
% end
if nargin<=2,tol=0;
end
ctr=data(1:K,:);
N=size(data,1);
while 1
% Assignment step
cls=cell(1,K); % K clusters
for l=1:N
dl=data(l,:);
d=norm(dl-ctr(1,:));ind=1;
for k=2:K
if norm(dl-ctr(k,:))<d,
ind=k;d=norm(dl-ctr(k,:));
end
end
cls{ind}=[cls{ind};dl]; % data l is in ind-th cluster
end
% Update step
S=0;
for k=1:K % calculate new centroids
newctr(k,:)=mean(cls{k});
S=S+norm(newctr(k,:)-ctr(k,:));
end
ctr=newctr; % update the centroids
if S<=tol,
break;
end
end