这里只给出k-最近邻聚类k-Nearest Neighbor的实现过程,算法的原理请自行搜索,代码如下:
k-最近邻聚类算法的主程序:
clc;
clear;
%读取数据文件,生成点矩阵
fileID = fopen('D:\matlabFile\KNN\KNN.txt');
C=textscan(fileID,'%f %f');
fclose(fileID);
%显示数组结果
%celldisp(C);
%将cell类型转换为矩阵类型,这里只假设原数据为二维属性,且是二维的坐标点
CC_init=cat(2,C{1},C{2});%用来保存初始加载的值
CC=CC_init;
%set key parameter:k
k=3;
% store all points that satisfy the condition k, construct every two
% points to edge
edges_all=zeros(2,2,1);
%edges count
count=1;
%4-D matrix to store edges
edges=zeros(2,2,k,1);
%keep this code safe
if size(CC>k+1)
for i=1:size(CC,1)
%extract one point
p=CC(i,:);
CC_temp=CC;
CC_temp(i,:)=[];
% find the nearest k points
points=FindNearNeighbor(k,p,CC_temp);
% merge two points to edge
edge_temp=zeros(2,2,1);
for j=1:size(points,1)
edge=[p;points(j,:)];
edge=sortrows(edge);
edge_temp(:,:,j)=edge;
edges_all(:,:,count)=edge;
count=count+1;
end
edges(:,:,:,i)=edge_temp;
end
end
% delete common edge
flag=1;
while flag
for i=1:size(edges_all,3)-1
%remeber all same edge serial numbers
serial_num=zeros(1,1);
%extract first edge
a=edges_all(:,:,i);
% cmopare first edge with the rest edges
for j=i+1:size(edges_all,3)
%extract second edge
b=edges_all(:,:,j);
if isequal(a,b)
serial_num=cat(1,serial_num,j);
end
end
%delete all common edges
serial_num(1,:)=[];
if length(serial_num)>=1
edges_all(:,:,serial_num)=[];
break;
end
end
if i==size(edges_all,3)-1
flag=0;
end
end
%plot all edges
for i=1:size(edges_all,3)
edge=edges_all(:,:,i);
plot(edge(:,1),edge(:,2));
hold on
end
FindNearNeighbor函数的实现如下:
function result=FindNearNeighbor(k,p,points)
%store points that satisfy the k condition, first column store distance
%between p and one of points,second column store the serial number of the
%point
dis_p=zeros(1,2);
for i=1:size(points,1)
%extract one point
point=points(i,:);
%compute the distance
dis=sqrt((p(1,1)-point(1,1))^2+(p(1,2)-point(1,2))^2);
dis_p(i,:)=[dis,i];
end
% sort dis_p set follow first column
dis_p=sortrows(dis_p,1);
dis_satisfy=dis_p(1:k,:);
%return the satisfied points
result=points(dis_satisfy(:,2),:);
end
KNN.txt实验数据如下,请复制后保存为txt格式
32 32
35 43
42 39
31 36
29 49
27 39
46 33
44 40
41 36
37 37
66 66
59 65
57 60
63 59
60 60
55 65
51 67
62 68
61 65
57 64
120 120
115 124
124 119
109 125
104 109
111 117
125 130
123 109
129 110
107 117