实验一:聚类
实验代码:
testKMeans.m ——测试主函数
function testKMeans()
X = GenerateGaussianMixtureDataset();
K = 2;
[gamma, centroids] = K_Means(X, K);
% show result
figure(3)
idx1 = find(gamma == 1);
plot(X(idx1,1),X(idx1,2),'ro', 'MarkerFaceColor','r');
hold on;
idx2 = find(gamma == 2);
plot(X(idx2,1),X(idx2,2),'bo','MarkerFaceColor','b');
hold on;
plot(centroids(:,1),centroids(:,2),'gx','LineWidth',4,'MarkerSize',15);
title('K-Means Result');
end
GenerateGaussianMixtureDataset.m——产生混合高斯数据
function [ X ] = GenerateGaussianMixtureDataset( )
%%
%产生混合高斯数据
%%
% show real data
figure(1)
mu = [2 3];
SIGMA = [1 0; 0 2];
r = mvnrnd(mu,SIGMA,100);
plot(r(:,1),r(:,2),'rx', 'MarkerFaceColor','r','LineWidth',2,'MarkerSize',10);
hold on;
mu = [7 8];
SIGMA = [ 1 0; 0 2];
r2 = mvnrnd(mu,SIGMA,100);
plot(r2(:,1),r2(:,2),'bx', 'MarkerFaceColor','b','LineWidth',2,'MarkerSize',10);
title('Real Data');
X = [r;r2];
figure(2)
% no label
plot(X(:,1),X(:,2),'mo', 'MarkerFaceColor','m');
title('No label data : X')
end
K_Means.m—— K-Means函数
function [gamma, centroids] = K_Means( X, K )
%%
%X - input data
%K - numbers of class
%gamma - indice variable
%%
% initialize
% get K data points from X
[N, D] = size(X);
rndp = randperm(N);
centroids = X(rndp(1:K),:);
% compute indice variable
gamma = zeros(N,1); % indice variable
sqr_dist = zeros(N,K); % square of distance
min_dist = 0;
% cost function
precost = 0;
while(1)
% E step
for i = 1:N
for j = 1:K
sqr_dist(i,j) = sum((X(i,:)-centroids(j,:)).^2);
end
[min_dist,gamma(i)] = min(sqr_dist(i,:));
end
% M step
cost = 0;
for i = 1:K
idx = find(gamma == i);
cost = cost + sum(sum((X(idx,:) - repmat(centroids(i,:),size(idx,1),1)).^2,2));
centroids(i,:) = mean(X(idx,:),1);
end
% out loop
if abs(cost - precost) < 1e-10
break;
else
precost = cost;
end
end
end
实验结果:
(1)真实数据
(2)不含标签的数据
(3)K-Means分类结果
实验二:图像分割
function [ segImg ] = ImageSegmentation( img, K )
repImg = zeros(size(img,1)*size(img,2),size(img,3));
segImg = zeros(size(img,1),size(img,2),size(img,3));
pos = 1;
%对图像
for i = 1:size(img,1)
for j = 1:size(img,2)
repImg(pos,:) = img(i,j,:);
pos = pos+1;
end
end
[gamma, centroids] = K_Means(repImg,K);
for i = 1:size(img,1)
for j = 1:size(img,2)
segImg(i,j,:) = centroids(gamma((i-1)*size(img,2)+j),:);
end
end
end
实验结果:
K=5
实验三:图像压缩
由于K-Means可以对图片进行分割,利用少量存储空间表示K类颜色,而对应像素位置只存储颜色类别号,这样可以使得图像的存储空间大大减小,从而实现图像压缩。当然K-Means实现的图像压缩是有损压缩。
理论推导见:EM算法(期望最大化)——从EM算法角度理解K-Means与GMM的区别http://blog.csdn.net/tingyue_/article/category/6850758