利用所学K-means聚类分析方法,对 Iris数据集进行聚类分析,并利用已知的样本类别标 签进行聚类分析评价。
本代码从四维到一维逐次降维进行分析,注释较为详细,可修改文件路径后直接使用,但确保通俗易懂的前提下难以避免冗长。
全部代码如下:
[attrib1,attrib2,attrib3,attrib4,attrib5]=textread('D\:iris.data','%f%f%f%f%s','delimiter',',');
%这里 delimiter指出分隔符,读数据的时候会自动跳过分隔符
attrib=[attrib1,attrib2,attrib3,attrib4]; %attrib默认为4列150行的矩阵
attrib_ = zeros(1, 150);%attrib_为花的类别属性
n1=0;n2=0;n3=0;%真实分类样本数
for i = 1: size(attrib5,1)
if (strcmp(attrib5(i), 'Iris-setosa' ))%strcmp函数用于比较两者是否相同
attrib_(1,i) = 1;
n1=+1;
elseif(strcmp(attrib5(i), 'Iris-versicolor') )
attrib_(1,i) = 2;
n2=+1;
else
attrib_(1,i) = 3;
n3=+1;
end
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%------------------------------------------------------四维
kresult=[5.1 3.5 1.4 0.2;7.0 3.2 4.7 1.4;6.3 3.3 6.0 2.5];%初始化聚类中心
lresult=[5.1 3.5 1.4 0.2;7.0 3.2 4.7 1.4;6.3 3.3 6.0 2.5];%用于比较上一次聚类中心结果
%-----------------------------------------------------------------------------------------------问题一:迭代全部聚到第三类
% lresult=[0 0 0 0;1 1 1 1;2 2 2 2];%用于比较上一次聚类中心结果
i1_length=0;i2_length=0;i3_length=0;%初始化距离每类中心的距离
N1=0;N2=0;N3=0;%k均值算法下分类样本数
k_class=zeros(1, 150);%k均值算法下的分类结果
sum_attrib=zeros(3,4);%用于求每一分类的总和
for j=1:150%第一次聚类
i1_length=(attrib(j,1)-kresult(1,1))^2+(attrib(j,2)-kresult(1,2))^2+(attrib(j,3)-kresult(1,3))^2+(attrib(j,4)-kresult(1,4))^2;
i2_length=(attrib(j,1)-kresult(2,1))^2+(attrib(j,2)-kresult(2,2))^2+(attrib(j,3)-kresult(2,3))^2+(attrib(j,4)-kresult(2,4))^2;
i3_length=(attrib(j,1)-kresult(3,1))^2+(attrib(j,2)-kresult(3,2))^2+(attrib(j,3)-kresult(3,3))^2+(attrib(j,4)-kresult(3,4))^2;
if ((i1_length<i2_length)&&(i1_length<i3_length))
k_class(1,j)=1;
sum_attrib(1,:)=sum_attrib(1,:)+attrib(j,:);
%-----------------------------------------------------------------------------------------问题二:聚类中心值明显异常
%sum_attrib(1,:)=+attrib(j,:);
N1=N1+1;
elseif ((i2_length<i1_length)&&(i2_length<i3_length))
sum_attrib(2,:)=sum_attrib(2,:)+attrib(j,:);
N2=N2+1;
else
k_class(1,j)=3;
sum_attrib(3,:)=sum_attrib(3,:)+attrib(j,:);
N3=N3+1;
end
end
kresult(1,:)=sum_attrib(1,:)/N1;
kresult(2,:)=sum_attrib(2,:)/N2;
kresult(3,:)=sum_attrib(3,:)/N3;
lresult=kresult;%将第一次迭代结果存储
N1=0;N2=0;N3=0;%将棣属于每一类的数量重新归零
sum_attrib=zeros(3,4);%重新归零
%----------------------------------------------------------问题三inf
% sum_attrib=zeros(3,4);%重新归零
while(1)%永真循环用于寻找真实聚类中心
for j=1:150
i1_length=(attrib(j,1)-kresult(1,1))^2+(attrib(j,2)-kresult(1,2))^2+(attrib(j,3)-kresult(1,3))^2+(attrib(j,4)-kresult(1,4))^2;
i2_length=(attrib(j,1)-kresult(2,1))^2+(attrib(j,2)-kresult(2,2))^2+(attrib(j,3)-kresult(2,3))^2+(attrib(j,4)-kresult(2,4))^2;
i3_length=(attrib(j,1)-kresult(3,1))^2+(attrib(j,2)-kresult(3,2))^2+(attrib(j,3)-kresult(3,3))^2+(attrib(j,4)-kresult(3,4))^2;
if ((i1_length<i2_length)&&(i1_length<i3_length))
k_class(1,j)=1;
sum_attrib(1,:)=sum_attrib(1,:)+attrib(j,:);
N1=N1+1;
elseif ((i2_length<i1_length)&&(i2_length<i3_length))
k_class(1,j)=2;
sum_attrib(2,:)=sum_attrib(2,:)+attrib(j,:);
N2=N2+1;
else
k_class(1,j)=3;
sum_attrib(3,:)=sum_attrib(3,:)+attrib(j,:);
N3=N3+1;
end
end
kresult(1,:)=sum_attrib(1,:)/N1;
kresult(2,:)=sum_attrib(2,:)/N2;
kresult(3,:)=sum_attrib(3,:)/N3;
if (lresult~=kresult)%如果上一次迭代结果与本次相同
lresult=kresult;%将此次聚类结果存储
N1=0;N2=0;N3=0;%将棣属于每一类的数量重新归零
sum_attrib=zeros(3,4);%重新归零
else
break;
end
end
rsumpoint=0;%初始化正确样本数目
for j=1:150
if k_class(1,j)==attrib_(1,j)
rsumpoint=rsumpoint+1;
end
end
accuracy4=rsumpoint/150;%四维精度
fprintf('四维聚类精度 = %f\n', accuracy4);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%--------------------------------------------------三维
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%-123
i1_length=0;i2_length=0;i3_length=0;%初始化距离每类中心的距离
N1=0;N2=0;N3=0;%将棣属于每一类的数量重新归零
sum_attrib=zeros(3,3);%重新归零
kresult=[5.1 3.5 1.4;7.0 3.2 4.7;6.3 3.3 6.0];%初始化聚类中心
lresult=[5.1 3.5 1.4;7.0 3.2 4.7;6.3 3.3 6.0];%用于比较上一次聚类中心结果
for j=1:150%第一次聚类
i1_length=(attrib(j,1)-kresult(1,1))^2+(attrib(j,2)-kresult(1,2))^2+(attrib(j,3)-kresult(1,3))^2;
i2_length=(attrib(j,1)-kresult(2,1))^2+(attrib(j,2)-kresult(2,2))^2+(attrib(j,3)-kresult(2,3))^2;
i3_length=(attrib(j,1)-kresult(3,1))^2+(attrib(j,2)-kresult(3,2))^2+(attrib(j,3)-kresult(3,3))^2;
if ((i1_length<i2_length)&&(i1_length<i3_length))
k_class(1,j)=1;
sum_attrib(1,1)=sum_attrib(1,1)+attrib(j,1);
sum_attrib(1,2)=sum_attrib(1,2)+attrib(j,2);
sum_attrib(1,3)=sum_attrib(1,3)+attrib(j,3);
N1=N1+1;
elseif ((i2_length<i1_length)&&(i2_length<i3_length))
k_class(1,j)=2;
sum_attrib(2,1)=sum_attrib(2,1)+attrib(j,1);
sum_attrib(2,2)=sum_attrib(2,2)+attrib(j,2);
sum_attrib(2,3)=sum_attrib(2,3)+attrib(j,3);
N2=N2+1;
else
k_class(1,j)=3;
sum_attrib(3,1)=sum_attrib(3,1)+attrib(j,1);
sum_attrib(3,2)=sum_attrib(3,2)+attrib(j,2);
sum_attrib(3,3)=sum_attrib(3,3)+attrib(j,3);
N3=N3+1;
end
end
kresult(1,:)=sum_attrib(1,:)/N1;
kresult(2,:)=sum_attrib(2,:)/N2;
kresult(3,:)=sum_attrib(3,:)/N3;
lresult=kresult;%将第一次迭代结果存储
N1=0;N2=0;N3=0;%将棣属于每一类的数量重新归零
sum_attrib=zeros(3,3);%重新归零
while(1)%永真循环用于寻找真实聚类中心
for j=1:150
i1_length=(attrib(j,1)-kresult(1,1))^2+(attrib(j,2)-kresult(1,2))^2+(attrib(j,3)-kresult(1,3))^2;
i2_length=(attrib(j,1)-kresult(2,1))^2+(attrib(j,2)-kresult(2,2))^2+(attrib(j,3)-kresult(2,3))^2;
i3_length=(attrib(j,1)-kresult(3,1))^2+(attrib(j,2)-kresult(3,2))^2+(attrib(j,3)-kresult(3,3))^2;
if ((i1_length<i2_length)&&(i1_length<i3_length))
k_class(1,j)=1;
sum_attrib(1,1)=sum_attrib(1,1)+attrib(j,1);
sum_attrib(1,2)=sum_attrib(1,2)+attrib(j,2);
sum_attrib(1,3)=sum_attrib(1,3)+attrib(j,3);
N1=N1+1;
elseif ((i2_length<i1_length)&&(i2_length<i3_length))
k_class(1,j)=2;
sum_attrib(2,1)=sum_attrib(2,1)+attrib(j,1);
sum_attrib(2,2)=sum_attrib(2,2)+attrib(j,2);
sum_attrib(2,3)=sum_attrib(2,3)+attrib(j,3);
N2=N2+1;
else
k_class(1,j)=3;
sum_attrib(3,1)=sum_attrib(3,1)+attrib(j,1);
sum_attrib(3,2)=sum_attrib(3,2)+attrib(j,2);
sum_attrib(3,3)=sum_attrib(3,3)+attrib(j,3);
N3=N3+1;
end
end
kresult(1,:)=sum_attrib(1,:)/N1;
kresult(2,:)=sum_attrib(2,:)/N2;
kresult(3,:)=sum_attrib(3,:)/N3;
if (lresult~=kresult)%如果上一次迭代结果与本次相同
lresult=kresult;%将此次聚类结果存储
N1=0;N2=0;N3=0;%将棣属于每一类的数量重新归零
sum_attrib=zeros(3,3);%重新归零
else
break;
end
end
figure(1)
subplot(2,2,1)
plot3(attrib1(k_class==1),attrib2(k_class==1),attrib3(k_class==1),'r.',attrib1(k_class==2),attrib2(k_class==2),attrib3(k_class==2),'g.',attrib1(k_class==3),attrib2(k_class==3),attrib3(k_class==3),'b.')
xlabel('sepal length');
ylabel('sepal width');
zlabel('petal length');
rsumpoint=0;%初始化正确样本数目
for j=1:150
if k_class(1,j)==attrib_(1,j)
rsumpoint=rsumpoint+1;
end
end
accuracy3_123=rsumpoint/150;
fprintf('三维聚类精度(123) = %f\n', accuracy3_123);
% accuracy3_123;%四维精度
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%-124
i1_length=0;i2_length=0;i3_length=0;%初始化距离每类中心的距离
N1=0;N2=0;N3=0;%将棣属于每一类的数量重新归零
sum_attrib=zeros(3,3);%重新归零
kresult=[5.1 3.5 0.2;7.0 3.2 1.4;6.3 3.3 2.5];%初始化聚类中心
lresult=[5.1 3.5 0.2;7.0 3.2 1.4;6.3 3.3 2.5];%用于比较上一次聚类中心结果
for j=1:150%第一次聚类
i1_length=(attrib(j,1)-kresult(1,1))^2+(attrib(j,2)-kresult(1,2))^2+(attrib(j,4)-kresult(1,3))^2;
i2_length=(attrib(j,1)-kresult(2,1))^2+(attrib(j,2)-kresult(2,2))^2+(attrib(j,4)-kresult(2,3))^2;
i3_length=(attrib(j,1)-kresult(3,1))^2+(attrib(j,2)-kresult(3,2))^2+(attrib(j,4)-kresult(3,3))^2;
if ((i1_length<i2_length)&&(i1_length<i3_length))
k_class(1,j)=1;
sum_attrib(1,1)=sum_attrib(1,1)+attrib(j,1);
sum_attrib(1,2)=sum_attrib(1,2)+attrib(j,2);
sum_attrib(1,3)=sum_attrib(1,3)+attrib(j,4);
N1=N1+1;
elseif ((i2_length<i1_length)&&(i2_length<i3_length))
k_class(1,j)=2;
sum_attrib(2,1)=sum_attrib(2,1)+attrib(j,1);
sum_attrib(2,2)=sum_attrib(2,2)+attrib(j,2);
sum_attrib(2,3)=sum_attrib(2,3)+attrib(j,4);
N2=N2+1;
else
k_class(1,j)=3;
sum_attrib(3,1)=sum_attrib(3,1)+attrib(j,1);
sum_attrib(3,2)=sum_attrib(3,2)+attrib(j,2);
sum_attrib(3,3)=sum_attrib(3,3)+attrib(j,4);
N3=N3+1;
end
end
kresult(1,:)=sum_attrib(1,:)/N1;
kresult(2,:)=sum_attrib(2,:)/N2;
kresult(3,:)=sum_attrib(3,:)/N3;
lresult=kresult;%将第一次迭代结果存储
N1=0;N2=0;N3=0;%将棣属于每一类的数量重新归零
sum_attrib=zeros(3,3);%重新归零
while(1)%永真循环用于寻找真实聚类中心
for j=1:150
i1_length=(attrib(j,1)-kresult(1,1))^2+(attrib(j,2)-kresult(1,2))^2+(attrib(j,4)-kresult(1,3))^2;
i2_length=(attrib(j,1)-kresult(2,1))^2+(attrib(j,2)-kresult(2,2))^2+(attrib(j,4)-kresult(2,3))^2;
i3_length=(attrib(j,1)-kresult(3,1))^2+(attrib(j,2)-kresult(3,2))^2+(attrib(j,4)-kresult(3,3))^2;
if ((i1_length<i2_length)&&(i1_length<i3_length))
k_class(1,j)=1;
sum_attrib(1,1)=sum_attrib(1,1)+attrib(j,1);
sum_attrib(1,2)=sum_attrib(1,2)+attrib(j,2);
sum_attrib(1,3)=sum_attrib(1,3)+attrib(j,4);
N1=N1+1;
elseif ((i2_length<i1_length)&&(i2_length<i3_length))
k_class(1,j)=2;
sum_attrib(2,1)=sum_attrib(2,1)+attrib(j,1);
sum_attrib(2,2)=sum_attrib(2,2)+attrib(j,2);
sum_attrib(2,3)=sum_attrib(2,3)+attrib(j,4);
N2=N2+1;
else
k_class(1,j)=3;
sum_attrib(3,1)=sum_attrib(3,1)+attrib(j,1);
sum_attrib(3,2)=sum_attrib(3,2)+attrib(j,2);
sum_attrib(3,3)=sum_attrib(3,3)+attrib(j,4);
N3=N3+1;
end
end
kresult(1,:)=sum_attrib(1,:)/N1;
kresult(2,:)=sum_attrib(2,:)/N2;
kresult(3,:)=sum_attrib(3,:)/N3;
if (lresult~=kresult)%如果上一次迭代结果与本次相同
lresult=kresult;%将此次聚类结果存储
N1=0;N2=0;N3=0;%将棣属于每一类的数量重新归零
sum_attrib=zeros(3,3);%重新归零
else
break;
end
end
subplot(2,2,2)
plot3(attrib1(k_class==1),attrib2(k_class==1),attrib4(k_class==1),'r.',attrib1(k_class==2),attrib2(k_class==2),attrib4(k_class==2),'g.',attrib1(k_class==3),attrib2(k_class==3),attrib4(k_class==3),'b.')
xlabel('sepal length');
ylabel('sepal width');
zlabel('petal width');
rsumpoint=0;%初始化正确样本数目
for j=1:150
if k_class(1,j)==attrib_(1,j)
rsumpoint=rsumpoint+1;
end
end
accuracy3_124=rsumpoint/150;
fprintf('三维聚类精度(124) = %f\n', accuracy3_124);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%-134
i1_length=0;i2_length=0;i3_length=0;%初始化距离每类中心的距离
N1=0;N2=0;N3=0;%将棣属于每一类的数量重新归零
sum_attrib=zeros(3,3);%重新归零
kresult=[5.1 1.4 0.2;7.0 4.7 1.4;6.3 6.0 2.5];%初始化聚类中心
lresult=[5.1 1.4 0.2;7.0 4.7 1.4;6.3 6.0 2.5];%用于比较上一次聚类中心结果
for j=1:150%第一次聚类
i1_length=(attrib(j,1)-kresult(1,1))^2+(attrib(j,3)-kresult(1,2))^2+(attrib(j,4)-kresult(1,3))^2;
i2_length=(attrib(j,1)-kresult(2,1))^2+(attrib(j,3)-kresult(2,2))^2+(attrib(j,4)-kresult(2,3))^2;
i3_length=(attrib(j,1)-kresult(3,1))^2+(attrib(j,3)-kresult(3,2))^2+(attrib(j,4)-kresult(3,3))^2;
if ((i1_length<i2_length)&&(i1_length<i3_length))
k_class(1,j)=1;
sum_attrib(1,1)=sum_attrib(1,1)+attrib(j,1);
sum_attrib(1,2)=sum_attrib(1,2)+attrib(j,3);
sum_attrib(1,3)=sum_attrib(1,3)+attrib(j,4);
N1=N1+1;
elseif ((i2_length<i1_length)&&(i2_length<i3_length))
k_class(1,j)=2;
sum_attrib(2,1)=sum_attrib(2,1)+attrib(j,1);
sum_attrib(2,2)=sum_attrib(2,2)+attrib(j,3);
sum_attrib(2,3)=sum_attrib(2,3)+attrib(j,4);
N2=N2+1;
else
k_class(1,j)=3;
sum_attrib(3,1)=sum_attrib(3,1)+attrib(j,1);
sum_attrib(3,2)=sum_attrib(3,2)+attrib(j,3);
sum_attrib(3,3)=sum_attrib(3,3)+attrib(j,4);
N3=N3+1;
end
end
kresult(1,:)=sum_attrib(1,:)/N1;
kresult(2,:)=sum_attrib(2,:)/N2;
kresult(3,:)=sum_attrib(3,:)/N3;
lresult=kresult;%将第一次迭代结果存储
N1=0;N2=0;N3=0;%将棣属于每一类的数量重新归零
sum_attrib=zeros(3,3);%重新归零
while(1)%永真循环用于寻找真实聚类中心
for j=1:150
i1_length=(attrib(j,1)-kresult(1,1))^2+(attrib(j,3)-kresult(1,2))^2+(attrib(j,4)-kresult(1,3))^2;
i2_length=(attrib(j,1)-kresult(2,1))^2+(attrib(j,3)-kresult(2,2))^2+(attrib(j,4)-kresult(2,3))^2;
i3_length=(attrib(j,1)-kresult(3,1))^2+(attrib(j,3)-kresult(3,2))^2+(attrib(j,4)-kresult(3,3))^2;
if ((i1_length<i2_length)&&(i1_length<i3_length))
k_class(1,j)=1;
sum_attrib(1,1)=sum_attrib(1,1)+attrib(j,1);
sum_attrib(1,2)=sum_attrib(1,2)+attrib(j,3);
sum_attrib(1,3)=sum_attrib(1,3)+attrib(j,4);
N1=N1+1;
elseif ((i2_length<i1_length)&&(i2_length<i3_length))
k_class(1,j)=2;
sum_attrib(2,1)=sum_attrib(2,1)+attrib(j,1);
sum_attrib(2,2)=sum_attrib(2,2)+attrib(j,3);
sum_attrib(2,3)=sum_attrib(2,3)+attrib(j,4);
N2=N2+1;
else
k_class(1,j)=3;
sum_attrib(3,1)=sum_attrib(3,1)+attrib(j,1);
sum_attrib(3,2)=sum_attrib(3,2)+attrib(j,3);
sum_attrib(3,3)=sum_attrib(3,3)+attrib(j,4);
N3=N3+1;
end
end
kresult(1,:)=sum_attrib(1,:)/N1;
kresult(2,:)=sum_attrib(2,:)/N2;
kresult(3,:)=sum_attrib(3,:)/N3;
if (lresult~=kresult)%如果上一次迭代结果与本次相同
lresult=kresult;%将此次聚类结果存储
N1=0;N2=0;N3=0;%将棣属于每一类的数量重新归零
sum_attrib=zeros(3,3);%重新归零
else
break;
end
end
subplot(2,2,3)
plot3(attrib1(k_class==1),attrib3(k_class==1),attrib4(k_class==1),'r.',attrib1(k_class==2),attrib3(k_class==2),attrib4(k_class==2),'g.',attrib1(k_class==3),attrib3(k_class==3),attrib4(k_class==3),'b.')
xlabel('sepal length');
ylabel('petal length');
zlabel('petal width');
rsumpoint=0;%初始化正确样本数目
for j=1:150
if k_class(1,j)==attrib_(1,j)
rsumpoint=rsumpoint+1;
end
end
accuracy3_134=rsumpoint/150;
fprintf('三维聚类精度(134) = %f\n', accuracy3_134);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%-234
i1_length=0;i2_length=0;i3_length=0;%初始化距离每类中心的距离
N1=0;N2=0;N3=0;%将棣属于每一类的数量重新归零
sum_attrib=zeros(3,3);%重新归零
kresult=[3.5 1.4 0.2;3.2 4.7 1.4;3.3 6.0 2.5];%初始化聚类中心
lresult=[3.5 1.4 0.2;3.2 4.7 1.4;3.3 6.0 2.5];%用于比较上一次聚类中心结果
for j=1:150%第一次聚类
i1_length=(attrib(j,2)-kresult(1,1))^2+(attrib(j,3)-kresult(1,2))^2+(attrib(j,4)-kresult(1,3))^2;
i2_length=(attrib(j,2)-kresult(2,1))^2+(attrib(j,3)-kresult(2,2))^2+(attrib(j,4)-kresult(2,3))^2;
i3_length=(attrib(j,2)-kresult(3,1))^2+(attrib(j,3)-kresult(3,2))^2+(attrib(j,4)-kresult(3,3))^2;
if ((i1_length<i2_length)&&(i1_length<i3_length))
k_class(1,j)=1;
sum_attrib(1,1)=sum_attrib(1,1)+attrib(j,2);
sum_attrib(1,2)=sum_attrib(1,2)+attrib(j,3);
sum_attrib(1,3)=sum_attrib(1,3)+attrib(j,4);
N1=N1+1;
elseif ((i2_length<i1_length)&&(i2_length<i3_length))
k_class(1,j)=2;
sum_attrib(2,1)=sum_attrib(2,1)+attrib(j,2);
sum_attrib(2,2)=sum_attrib(2,2)+attrib(j,3);
sum_attrib(2,3)=sum_attrib(2,3)+attrib(j,4);
N2=N2+1;
else
k_class(1,j)=3;
sum_attrib(3,1)=sum_attrib(3,1)+attrib(j,2);
sum_attrib(3,2)=sum_attrib(3,2)+attrib(j,3);
sum_attrib(3,3)=sum_attrib(3,3)+attrib(j,4);
N3=N3+1;
end
end
kresult(1,:)=sum_attrib(1,:)/N1;
kresult(2,:)=sum_attrib(2,:)/N2;
kresult(3,:)=sum_attrib(3,:)/N3;
lresult=kresult;%将第一次迭代结果存储
N1=0;N2=0;N3=0;%将棣属于每一类的数量重新归零
sum_attrib=zeros(3,3);%重新归零
while(1)%永真循环用于寻找真实聚类中心
for j=1:150
i1_length=(attrib(j,2)-kresult(1,1))^2+(attrib(j,3)-kresult(1,2))^2+(attrib(j,4)-kresult(1,3))^2;
i2_length=(attrib(j,2)-kresult(2,1))^2+(attrib(j,3)-kresult(2,2))^2+(attrib(j,4)-kresult(2,3))^2;
i3_length=(attrib(j,2)-kresult(3,1))^2+(attrib(j,3)-kresult(3,2))^2+(attrib(j,4)-kresult(3,3))^2;
if ((i1_length<i2_length)&&(i1_length<i3_length))
k_class(1,j)=1;
sum_attrib(1,1)=sum_attrib(1,1)+attrib(j,2);
sum_attrib(1,2)=sum_attrib(1,2)+attrib(j,3);
sum_attrib(1,3)=sum_attrib(1,3)+attrib(j,4);
N1=N1+1;
elseif ((i2_length<i1_length)&&(i2_length<i3_length))
k_class(1,j)=2;
sum_attrib(2,1)=sum_attrib(2,1)+attrib(j,2);
sum_attrib(2,2)=sum_attrib(2,2)+attrib(j,3);
sum_attrib(2,3)=sum_attrib(2,3)+attrib(j,4);
N2=N2+1;
else
k_class(1,j)=3;
sum_attrib(3,1)=sum_attrib(3,1)+attrib(j,2);
sum_attrib(3,2)=sum_attrib(3,2)+attrib(j,3);
sum_attrib(3,3)=sum_attrib(3,3)+attrib(j,4);
N3=N3+1;
end
end
kresult(1,:)=sum_attrib(1,:)/N1;
kresult(2,:)=sum_attrib(2,:)/N2;
kresult(3,:)=sum_attrib(3,:)/N3;
if (lresult~=kresult)%如果上一次迭代结果与本次相同
lresult=kresult;%将此次聚类结果存储
N1=0;N2=0;N3=0;%将棣属于每一类的数量重新归零
sum_attrib=zeros(3,3);%重新归零
else
break;
end
end
subplot(2,2,4)
plot3(attrib2(k_class==1),attrib3(k_class==1),attrib4(k_class==1),'r.',attrib2(k_class==2),attrib3(k_class==2),attrib4(k_class==2),'g.',attrib2(k_class==3),attrib3(k_class==3),attrib4(k_class==3),'b.')
xlabel('sepal width');
ylabel('petal length');
zlabel('petal width');
rsumpoint=0;%初始化正确样本数目
for j=1:150
if k_class(1,j)==attrib_(1,j)
rsumpoint=rsumpoint+1;
end
end
accuracy3_234=rsumpoint/150;
fprintf('三维聚类精度(234) = %f\n', accuracy3_234);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%--------------------------------------------------二维
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%-12
i1_length=0;i2_length=0;i3_length=0;%初始化距离每类中心的距离
N1=0;N2=0;N3=0;%将棣属于每一类的数量重新归零
sum_attrib=zeros(3,2);%重新归零
kresult=[5.1 3.5;7.0 3.2;6.3 3.3];%初始化聚类中心
lresult=[5.1 3.5;7.0 3.2;6.3 3.3];%用于比较上一次聚类中心结果
for j=1:150%第一次聚类
i1_length=(attrib(j,1)-kresult(1,1))^2+(attrib(j,2)-kresult(1,2))^2;
i2_length=(attrib(j,1)-kresult(2,1))^2+(attrib(j,2)-kresult(2,2))^2;
i3_length=(attrib(j,1)-kresult(3,1))^2+(attrib(j,2)-kresult(3,2))^2;
if ((i1_length<i2_length)&&(i1_length<i3_length))
k_class(1,j)=1;
sum_attrib(1,1)=sum_attrib(1,1)+attrib(j,1);
sum_attrib(1,2)=sum_attrib(1,2)+attrib(j,2);
N1=N1+1;
elseif ((i2_length<i1_length)&&(i2_length<i3_length))
k_class(1,j)=2;
sum_attrib(2,1)=sum_attrib(2,1)+attrib(j,1);
sum_attrib(2,2)=sum_attrib(2,2)+attrib(j,2);
N2=N2+1;
else
k_class(1,j)=3;
sum_attrib(3,1)=sum_attrib(3,1)+attrib(j,1);
sum_attrib(3,2)=sum_attrib(3,2)+attrib(j,2);
N3=N3+1;
end
end
kresult(1,:)=sum_attrib(1,:)/N1;
kresult(2,:)=sum_attrib(2,:)/N2;
kresult(3,:)=sum_attrib(3,:)/N3;
lresult=kresult;%将第一次迭代结果存储
N1=0;N2=0;N3=0;%将棣属于每一类的数量重新归零
sum_attrib=zeros(3,2);%重新归零
while(1)%永真循环用于寻找真实聚类中心
for j=1:150
i1_length=(attrib(j,1)-kresult(1,1))^2+(attrib(j,2)-kresult(1,2))^2;
i2_length=(attrib(j,1)-kresult(2,1))^2+(attrib(j,2)-kresult(2,2))^2;
i3_length=(attrib(j,1)-kresult(3,1))^2+(attrib(j,2)-kresult(3,2))^2;
if ((i1_length<i2_length)&&(i1_length<i3_length))
k_class(1,j)=1;
sum_attrib(1,1)=sum_attrib(1,1)+attrib(j,1);
sum_attrib(1,2)=sum_attrib(1,2)+attrib(j,2);
N1=N1+1;
elseif ((i2_length<i1_length)&&(i2_length<i3_length))
k_class(1,j)=2;
sum_attrib(2,1)=sum_attrib(2,1)+attrib(j,1);
sum_attrib(2,2)=sum_attrib(2,2)+attrib(j,2);
N2=N2+1;
else
k_class(1,j)=3;
sum_attrib(3,1)=sum_attrib(3,1)+attrib(j,1);
sum_attrib(3,2)=sum_attrib(3,2)+attrib(j,2);
N3=N3+1;
end
end
kresult(1,:)=sum_attrib(1,:)/N1;
kresult(2,:)=sum_attrib(2,:)/N2;
kresult(3,:)=sum_attrib(3,:)/N3;
if (lresult~=kresult)%如果上一次迭代结果与本次相同
lresult=kresult;%将此次聚类结果存储
N1=0;N2=0;N3=0;%将棣属于每一类的数量重新归零
sum_attrib=zeros(3,2);%重新归零
else
break;
end
end
figure(2)
subplot(2,3,1)
plot(attrib1(k_class==1),attrib2(k_class==1),'r.',attrib1(k_class==2),attrib2(k_class==2),'g.',attrib1(k_class==3),attrib2(k_class==3),'b.');
xlabel('sepal length');
ylabel('sepal width');
rsumpoint=0;%初始化正确样本数目
for j=1:150
if k_class(1,j)==attrib_(1,j)
rsumpoint=rsumpoint+1;
end
end
accuracy2_12=rsumpoint/150;
fprintf('二维聚类精度(12) = %f\n', accuracy2_12);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%-13
i1_length=0;i2_length=0;i3_length=0;%初始化距离每类中心的距离
N1=0;N2=0;N3=0;%将棣属于每一类的数量重新归零
sum_attrib=zeros(3,2);%重新归零
kresult=[5.1 1.4;7.0 4.7;6.3 6.0];%初始化聚类中心
lresult=[5.1 1.4;7.0 4.7;6.3 6.0];%用于比较上一次聚类中心结果
for j=1:150%第一次聚类
i1_length=(attrib(j,1)-kresult(1,1))^2+(attrib(j,3)-kresult(1,2))^2;
i2_length=(attrib(j,1)-kresult(2,1))^2+(attrib(j,3)-kresult(2,2))^2;
i3_length=(attrib(j,1)-kresult(3,1))^2+(attrib(j,3)-kresult(3,2))^2;
if ((i1_length<i2_length)&&(i1_length<i3_length))
k_class(1,j)=1;
sum_attrib(1,1)=sum_attrib(1,1)+attrib(j,1);
sum_attrib(1,2)=sum_attrib(1,2)+attrib(j,3);
N1=N1+1;
elseif ((i2_length<i1_length)&&(i2_length<i3_length))
k_class(1,j)=2;
sum_attrib(2,1)=sum_attrib(2,1)+attrib(j,1);
sum_attrib(2,2)=sum_attrib(2,2)+attrib(j,3);
N2=N2+1;
else
k_class(1,j)=3;
sum_attrib(3,1)=sum_attrib(3,1)+attrib(j,1);
sum_attrib(3,2)=sum_attrib(3,2)+attrib(j,3);
N3=N3+1;
end
end
kresult(1,:)=sum_attrib(1,:)/N1;
kresult(2,:)=sum_attrib(2,:)/N2;
kresult(3,:)=sum_attrib(3,:)/N3;
lresult=kresult;%将第一次迭代结果存储
N1=0;N2=0;N3=0;%将棣属于每一类的数量重新归零
sum_attrib=zeros(3,2);%重新归零
while(1)%永真循环用于寻找真实聚类中心
for j=1:150
i1_length=(attrib(j,1)-kresult(1,1))^2+(attrib(j,3)-kresult(1,2))^2;
i2_length=(attrib(j,1)-kresult(2,1))^2+(attrib(j,3)-kresult(2,2))^2;
i3_length=(attrib(j,1)-kresult(3,1))^2+(attrib(j,3)-kresult(3,2))^2;
if ((i1_length<i2_length)&&(i1_length<i3_length))
k_class(1,j)=1;
sum_attrib(1,1)=sum_attrib(1,1)+attrib(j,1);
sum_attrib(1,2)=sum_attrib(1,2)+attrib(j,3);
N1=N1+1;
elseif ((i2_length<i1_length)&&(i2_length<i3_length))
k_class(1,j)=2;
sum_attrib(2,1)=sum_attrib(2,1)+attrib(j,1);
sum_attrib(2,2)=sum_attrib(2,2)+attrib(j,3);
N2=N2+1;
else
k_class(1,j)=3;
sum_attrib(3,1)=sum_attrib(3,1)+attrib(j,1);
sum_attrib(3,2)=sum_attrib(3,2)+attrib(j,3);
N3=N3+1;
end
end
kresult(1,:)=sum_attrib(1,:)/N1;
kresult(2,:)=sum_attrib(2,:)/N2;
kresult(3,:)=sum_attrib(3,:)/N3;
if (lresult~=kresult)%如果上一次迭代结果与本次相同
lresult=kresult;%将此次聚类结果存储
N1=0;N2=0;N3=0;%将棣属于每一类的数量重新归零
sum_attrib=zeros(3,2);%重新归零
else
break;
end
end
subplot(2,3,2)
plot(attrib1(k_class==1),attrib3(k_class==1),'r.',attrib1(k_class==2),attrib3(k_class==2),'g.',attrib1(k_class==3),attrib3(k_class==3),'b.');
xlabel('sepal length');
ylabel('petal length');
rsumpoint=0;%初始化正确样本数目
for j=1:150
if k_class(1,j)==attrib_(1,j)
rsumpoint=rsumpoint+1;
end
end
accuracy2_13=rsumpoint/150;
fprintf('二维聚类精度(13) = %f\n', accuracy2_13);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%-14
i1_length=0;i2_length=0;i3_length=0;%初始化距离每类中心的距离
N1=0;N2=0;N3=0;%将棣属于每一类的数量重新归零
sum_attrib=zeros(3,2);%重新归零
kresult=[5.1 0.2;7.0 1.4;6.3 2.5];%初始化聚类中心
lresult=[5.1 0.2;7.0 1.4;6.3 2.5];%用于比较上一次聚类中心结果
for j=1:150%第一次聚类
i1_length=(attrib(j,1)-kresult(1,1))^2+(attrib(j,4)-kresult(1,2))^2;
i2_length=(attrib(j,1)-kresult(2,1))^2+(attrib(j,4)-kresult(2,2))^2;
i3_length=(attrib(j,1)-kresult(3,1))^2+(attrib(j,4)-kresult(3,2))^2;
if ((i1_length<i2_length)&&(i1_length<i3_length))
k_class(1,j)=1;
sum_attrib(1,1)=sum_attrib(1,1)+attrib(j,1);
sum_attrib(1,2)=sum_attrib(1,2)+attrib(j,4);
N1=N1+1;
elseif ((i2_length<i1_length)&&(i2_length<i3_length))
k_class(1,j)=2;
sum_attrib(2,1)=sum_attrib(2,1)+attrib(j,1);
sum_attrib(2,2)=sum_attrib(2,2)+attrib(j,4);
N2=N2+1;
else
k_class(1,j)=3;
sum_attrib(3,1)=sum_attrib(3,1)+attrib(j,1);
sum_attrib(3,2)=sum_attrib(3,2)+attrib(j,4);
N3=N3+1;
end
end
kresult(1,:)=sum_attrib(1,:)/N1;
kresult(2,:)=sum_attrib(2,:)/N2;
kresult(3,:)=sum_attrib(3,:)/N3;
lresult=kresult;%将第一次迭代结果存储
N1=0;N2=0;N3=0;%将棣属于每一类的数量重新归零
sum_attrib=zeros(3,2);%重新归零
while(1)%永真循环用于寻找真实聚类中心
for j=1:150
i1_length=(attrib(j,1)-kresult(1,1))^2+(attrib(j,4)-kresult(1,2))^2;
i2_length=(attrib(j,1)-kresult(2,1))^2+(attrib(j,4)-kresult(2,2))^2;
i3_length=(attrib(j,1)-kresult(3,1))^2+(attrib(j,4)-kresult(3,2))^2;
if ((i1_length<i2_length)&&(i1_length<i3_length))
k_class(1,j)=1;
sum_attrib(1,1)=sum_attrib(1,1)+attrib(j,1);
sum_attrib(1,2)=sum_attrib(1,2)+attrib(j,4);
N1=N1+1;
elseif ((i2_length<i1_length)&&(i2_length<i3_length))
k_class(1,j)=2;
sum_attrib(2,1)=sum_attrib(2,1)+attrib(j,1);
sum_attrib(2,2)=sum_attrib(2,2)+attrib(j,4);
N2=N2+1;
else
k_class(1,j)=3;
sum_attrib(3,1)=sum_attrib(3,1)+attrib(j,1);
sum_attrib(3,2)=sum_attrib(3,2)+attrib(j,4);
N3=N3+1;
end
end
kresult(1,:)=sum_attrib(1,:)/N1;
kresult(2,:)=sum_attrib(2,:)/N2;
kresult(3,:)=sum_attrib(3,:)/N3;
if (lresult~=kresult)%如果上一次迭代结果与本次相同
lresult=kresult;%将此次聚类结果存储
N1=0;N2=0;N3=0;%将棣属于每一类的数量重新归零
sum_attrib=zeros(3,2);%重新归零
else
break;
end
end
subplot(2,3,3)
plot(attrib1(k_class==1),attrib4(k_class==1),'r.',attrib1(k_class==2),attrib4(k_class==2),'g.',attrib1(k_class==3),attrib4(k_class==3),'b.');
xlabel('sepal length');
ylabel('petal width');
rsumpoint=0;%初始化正确样本数目
for j=1:150
if k_class(1,j)==attrib_(1,j)
rsumpoint=rsumpoint+1;
end
end
accuracy2_14=rsumpoint/150;
fprintf('二维聚类精度(14) = %f\n', accuracy2_14);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%-24
i1_length=0;i2_length=0;i3_length=0;%初始化距离每类中心的距离
N1=0;N2=0;N3=0;%将棣属于每一类的数量重新归零
sum_attrib=zeros(3,2);%重新归零
kresult=[3.5 0.2;3.2 1.4;3.3 2.5];%初始化聚类中心
lresult=[3.5 0.2;3.2 1.4;3.3 2.5];%用于比较上一次聚类中心结果
for j=1:150%第一次聚类
i1_length=(attrib(j,2)-kresult(1,1))^2+(attrib(j,4)-kresult(1,2))^2;
i2_length=(attrib(j,2)-kresult(2,1))^2+(attrib(j,4)-kresult(2,2))^2;
i3_length=(attrib(j,2)-kresult(3,1))^2+(attrib(j,4)-kresult(3,2))^2;
if ((i1_length<i2_length)&&(i1_length<i3_length))
k_class(1,j)=1;
sum_attrib(1,1)=sum_attrib(1,1)+attrib(j,2);
sum_attrib(1,2)=sum_attrib(1,2)+attrib(j,4);
N1=N1+1;
elseif ((i2_length<i1_length)&&(i2_length<i3_length))
k_class(1,j)=2;
sum_attrib(2,1)=sum_attrib(2,1)+attrib(j,2);
sum_attrib(2,2)=sum_attrib(2,2)+attrib(j,4);
N2=N2+1;
else
k_class(1,j)=3;
sum_attrib(3,1)=sum_attrib(3,1)+attrib(j,2);
sum_attrib(3,2)=sum_attrib(3,2)+attrib(j,4);
N3=N3+1;
end
end
kresult(1,:)=sum_attrib(1,:)/N1;
kresult(2,:)=sum_attrib(2,:)/N2;
kresult(3,:)=sum_attrib(3,:)/N3;
lresult=kresult;%将第一次迭代结果存储
N1=0;N2=0;N3=0;%将棣属于每一类的数量重新归零
sum_attrib=zeros(3,2);%重新归零
while(1)%永真循环用于寻找真实聚类中心
for j=1:150
i1_length=(attrib(j,2)-kresult(1,1))^2+(attrib(j,4)-kresult(1,2))^2;
i2_length=(attrib(j,2)-kresult(2,1))^2+(attrib(j,4)-kresult(2,2))^2;
i3_length=(attrib(j,2)-kresult(3,1))^2+(attrib(j,4)-kresult(3,2))^2;
if ((i1_length<i2_length)&&(i1_length<i3_length))
k_class(1,j)=1;
sum_attrib(1,1)=sum_attrib(1,1)+attrib(j,2);
sum_attrib(1,2)=sum_attrib(1,2)+attrib(j,4);
N1=N1+1;
elseif ((i2_length<i1_length)&&(i2_length<i3_length))
k_class(1,j)=2;
sum_attrib(2,1)=sum_attrib(2,1)+attrib(j,2);
sum_attrib(2,2)=sum_attrib(2,2)+attrib(j,4);
N2=N2+1;
else
k_class(1,j)=3;
sum_attrib(3,1)=sum_attrib(3,1)+attrib(j,2);
sum_attrib(3,2)=sum_attrib(3,2)+attrib(j,4);
N3=N3+1;
end
end
kresult(1,:)=sum_attrib(1,:)/N1;
kresult(2,:)=sum_attrib(2,:)/N2;
kresult(3,:)=sum_attrib(3,:)/N3;
if (lresult~=kresult)%如果上一次迭代结果与本次相同
lresult=kresult;%将此次聚类结果存储
N1=0;N2=0;N3=0;%将棣属于每一类的数量重新归零
sum_attrib=zeros(3,2);%重新归零
else
break;
end
end
subplot(2,3,4)
plot(attrib2(k_class==1),attrib4(k_class==1),'r.',attrib2(k_class==2),attrib4(k_class==2),'g.',attrib2(k_class==3),attrib4(k_class==3),'b.');
xlabel('sepal width');
ylabel('petal width');
rsumpoint=0;%初始化正确样本数目
for j=1:150
if k_class(1,j)==attrib_(1,j)
rsumpoint=rsumpoint+1;
end
end
accuracy2_24=rsumpoint/150;
fprintf('二维聚类精度(24) = %f\n', accuracy2_24);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%-23
i1_length=0;i2_length=0;i3_length=0;%初始化距离每类中心的距离
N1=0;N2=0;N3=0;%将棣属于每一类的数量重新归零
sum_attrib=zeros(3,2);%重新归零
kresult=[3.5 1.4;3.2 4.7;3.3 6.0];%初始化聚类中心
lresult=[3.5 1.4;3.2 4.7;3.3 6.0];%用于比较上一次聚类中心结果
for j=1:150%第一次聚类
i1_length=(attrib(j,2)-kresult(1,1))^2+(attrib(j,3)-kresult(1,2))^2;
i2_length=(attrib(j,2)-kresult(2,1))^2+(attrib(j,3)-kresult(2,2))^2;
i3_length=(attrib(j,2)-kresult(3,1))^2+(attrib(j,3)-kresult(3,2))^2;
if ((i1_length<i2_length)&&(i1_length<i3_length))
k_class(1,j)=1;
sum_attrib(1,1)=sum_attrib(1,1)+attrib(j,2);
sum_attrib(1,2)=sum_attrib(1,2)+attrib(j,3);
N1=N1+1;
elseif ((i2_length<i1_length)&&(i2_length<i3_length))
k_class(1,j)=2;
sum_attrib(2,1)=sum_attrib(2,1)+attrib(j,2);
sum_attrib(2,2)=sum_attrib(2,2)+attrib(j,3);
N2=N2+1;
else
k_class(1,j)=3;
sum_attrib(3,1)=sum_attrib(3,1)+attrib(j,2);
sum_attrib(3,2)=sum_attrib(3,2)+attrib(j,3);
N3=N3+1;
end
end
kresult(1,:)=sum_attrib(1,:)/N1;
kresult(2,:)=sum_attrib(2,:)/N2;
kresult(3,:)=sum_attrib(3,:)/N3;
lresult=kresult;%将第一次迭代结果存储
N1=0;N2=0;N3=0;%将棣属于每一类的数量重新归零
sum_attrib=zeros(3,2);%重新归零
while(1)%永真循环用于寻找真实聚类中心
for j=1:150
i1_length=(attrib(j,2)-kresult(1,1))^2+(attrib(j,3)-kresult(1,2))^2;
i2_length=(attrib(j,2)-kresult(2,1))^2+(attrib(j,3)-kresult(2,2))^2;
i3_length=(attrib(j,2)-kresult(3,1))^2+(attrib(j,3)-kresult(3,2))^2;
if ((i1_length<i2_length)&&(i1_length<i3_length))
k_class(1,j)=1;
sum_attrib(1,1)=sum_attrib(1,1)+attrib(j,2);
sum_attrib(1,2)=sum_attrib(1,2)+attrib(j,3);
N1=N1+1;
elseif ((i2_length<i1_length)&&(i2_length<i3_length))
k_class(1,j)=2;
sum_attrib(2,1)=sum_attrib(2,1)+attrib(j,2);
sum_attrib(2,2)=sum_attrib(2,2)+attrib(j,3);
N2=N2+1;
else
k_class(1,j)=3;
sum_attrib(3,1)=sum_attrib(3,1)+attrib(j,2);
sum_attrib(3,2)=sum_attrib(3,2)+attrib(j,3);
N3=N3+1;
end
end
kresult(1,:)=sum_attrib(1,:)/N1;
kresult(2,:)=sum_attrib(2,:)/N2;
kresult(3,:)=sum_attrib(3,:)/N3;
if (lresult~=kresult)%如果上一次迭代结果与本次相同
lresult=kresult;%将此次聚类结果存储
N1=0;N2=0;N3=0;%将棣属于每一类的数量重新归零
sum_attrib=zeros(3,2);%重新归零
else
break;
end
end
subplot(2,3,5)
plot(attrib2(k_class==1),attrib3(k_class==1),'r.',attrib2(k_class==2),attrib3(k_class==2),'g.',attrib2(k_class==3),attrib3(k_class==3),'b.');
xlabel('sepal width');
ylabel('petal length');
rsumpoint=0;%初始化正确样本数目
for j=1:150
if k_class(1,j)==attrib_(1,j)
rsumpoint=rsumpoint+1;
end
end
accuracy2_23=rsumpoint/150;
fprintf('二维聚类精度(23) = %f\n', accuracy2_23);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%-34
i1_length=0;i2_length=0;i3_length=0;%初始化距离每类中心的距离
N1=0;N2=0;N3=0;%将棣属于每一类的数量重新归零
sum_attrib=zeros(3,2);%重新归零
kresult=[1.4 0.2;4.7 1.4;6.0 2.5];%初始化聚类中心
lresult=[1.4 0.2;4.7 1.4;6.0 2.5];%用于比较上一次聚类中心结果
for j=1:150%第一次聚类
i1_length=(attrib(j,3)-kresult(1,1))^2+(attrib(j,4)-kresult(1,2))^2;
i2_length=(attrib(j,3)-kresult(2,1))^2+(attrib(j,4)-kresult(2,2))^2;
i3_length=(attrib(j,3)-kresult(3,1))^2+(attrib(j,4)-kresult(3,2))^2;
if ((i1_length<i2_length)&&(i1_length<i3_length))
k_class(1,j)=1;
sum_attrib(1,1)=sum_attrib(1,1)+attrib(j,3);
sum_attrib(1,2)=sum_attrib(1,2)+attrib(j,4);
N1=N1+1;
elseif ((i2_length<i1_length)&&(i2_length<i3_length))
k_class(1,j)=2;
sum_attrib(2,1)=sum_attrib(2,1)+attrib(j,3);
sum_attrib(2,2)=sum_attrib(2,2)+attrib(j,4);
N2=N2+1;
else
k_class(1,j)=3;
sum_attrib(3,1)=sum_attrib(3,1)+attrib(j,3);
sum_attrib(3,2)=sum_attrib(3,2)+attrib(j,4);
N3=N3+1;
end
end
kresult(1,:)=sum_attrib(1,:)/N1;
kresult(2,:)=sum_attrib(2,:)/N2;
kresult(3,:)=sum_attrib(3,:)/N3;
lresult=kresult;%将第一次迭代结果存储
N1=0;N2=0;N3=0;%将棣属于每一类的数量重新归零
sum_attrib=zeros(3,2);%重新归零
while(1)%永真循环用于寻找真实聚类中心
for j=1:150
i1_length=(attrib(j,3)-kresult(1,1))^2+(attrib(j,4)-kresult(1,2))^2;
i2_length=(attrib(j,3)-kresult(2,1))^2+(attrib(j,4)-kresult(2,2))^2;
i3_length=(attrib(j,3)-kresult(3,1))^2+(attrib(j,4)-kresult(3,2))^2;
if ((i1_length<i2_length)&&(i1_length<i3_length))
k_class(1,j)=1;
sum_attrib(1,1)=sum_attrib(1,1)+attrib(j,3);
sum_attrib(1,2)=sum_attrib(1,2)+attrib(j,4);
N1=N1+1;
elseif ((i2_length<i1_length)&&(i2_length<i3_length))
k_class(1,j)=2;
sum_attrib(2,1)=sum_attrib(2,1)+attrib(j,3);
sum_attrib(2,2)=sum_attrib(2,2)+attrib(j,4);
N2=N2+1;
else
k_class(1,j)=3;
sum_attrib(3,1)=sum_attrib(3,1)+attrib(j,3);
sum_attrib(3,2)=sum_attrib(3,2)+attrib(j,4);
N3=N3+1;
end
end
kresult(1,:)=sum_attrib(1,:)/N1;
kresult(2,:)=sum_attrib(2,:)/N2;
kresult(3,:)=sum_attrib(3,:)/N3;
if (lresult~=kresult)%如果上一次迭代结果与本次相同
lresult=kresult;%将此次聚类结果存储
N1=0;N2=0;N3=0;%将棣属于每一类的数量重新归零
sum_attrib=zeros(3,2);%重新归零
else
break;
end
end
subplot(2,3,6)
plot(attrib3(k_class==1),attrib4(k_class==1),'r.',attrib3(k_class==2),attrib4(k_class==2),'g.',attrib3(k_class==3),attrib4(k_class==3),'b.');
xlabel('petal length');
ylabel('petal width');
rsumpoint=0;%初始化正确样本数目
for j=1:150
if k_class(1,j)==attrib_(1,j)
rsumpoint=rsumpoint+1;
end
end
accuracy2_34=rsumpoint/150;
fprintf('二维聚类精度(34) = %f\n', accuracy2_34);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%--------------------------------------------------一维
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%-1
i1_length=0;i2_length=0;i3_length=0;%初始化距离每类中心的距离
N1=0;N2=0;N3=0;%将棣属于每一类的数量重新归零
sum_attrib=zeros(3,1);%重新归零
kresult=[5.1;7.0;6.3];%初始化聚类中心
lresult=[5.1;7.0;6.3];%用于比较上一次聚类中心结果
for j=1:150%第一次聚类
i1_length=(attrib(j,1)-kresult(1,1))^2;
i2_length=(attrib(j,1)-kresult(2,1))^2;
i3_length=(attrib(j,1)-kresult(3,1))^2;
if ((i1_length<i2_length)&&(i1_length<i3_length))
k_class(1,j)=1;
sum_attrib(1,1)=sum_attrib(1,1)+attrib(j,1);
N1=N1+1;
elseif ((i2_length<i1_length)&&(i2_length<i3_length))
k_class(1,j)=2;
sum_attrib(2,1)=sum_attrib(2,1)+attrib(j,1);
N2=N2+1;
else
k_class(1,j)=3;
sum_attrib(3,1)=sum_attrib(3,1)+attrib(j,1);
N3=N3+1;
end
end
kresult(1,:)=sum_attrib(1,:)/N1;
kresult(2,:)=sum_attrib(2,:)/N2;
kresult(3,:)=sum_attrib(3,:)/N3;
lresult=kresult;%将第一次迭代结果存储
N1=0;N2=0;N3=0;%将棣属于每一类的数量重新归零
sum_attrib=zeros(3,1);%重新归零
while(1)%永真循环用于寻找真实聚类中心
for j=1:150
i1_length=(attrib(j,1)-kresult(1,1))^2;
i2_length=(attrib(j,1)-kresult(2,1))^2;
i3_length=(attrib(j,1)-kresult(3,1))^2;
if ((i1_length<i2_length)&&(i1_length<i3_length))
k_class(1,j)=1;
sum_attrib(1,1)=sum_attrib(1,1)+attrib(j,1);
N1=N1+1;
elseif ((i2_length<i1_length)&&(i2_length<i3_length))
k_class(1,j)=2;
sum_attrib(2,1)=sum_attrib(2,1)+attrib(j,1);
N2=N2+1;
else
k_class(1,j)=3;
sum_attrib(3,1)=sum_attrib(3,1)+attrib(j,1);
N3=N3+1;
end
end
kresult(1,:)=sum_attrib(1,:)/N1;
kresult(2,:)=sum_attrib(2,:)/N2;
kresult(3,:)=sum_attrib(3,:)/N3;
if (lresult~=kresult)%如果上一次迭代结果与本次相同
lresult=kresult;%将此次聚类结果存储
N1=0;N2=0;N3=0;%将棣属于每一类的数量重新归零
sum_attrib=zeros(3,1);%重新归零
else
break;
end
end
figure(3)
subplot(2,2,1)
plot(attrib1(k_class==1),1,'r.',attrib1(k_class==2),2,'g.',attrib1(k_class==3),3,'b.');
xlabel('sepal length');
ylabel('class');
rsumpoint=0;%初始化正确样本数目
for j=1:150
if k_class(1,j)==attrib_(1,j)
rsumpoint=rsumpoint+1;
end
end
accuracy1_1=rsumpoint/150;
fprintf('一维聚类精度(1) = %f\n', accuracy1_1);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%-2
i1_length=0;i2_length=0;i3_length=0;%初始化距离每类中心的距离
N1=0;N2=0;N3=0;%将棣属于每一类的数量重新归零
sum_attrib=zeros(3,1);%重新归零
kresult=[3.5;3.2;3.3];%初始化聚类中心
lresult=[3.5;3.2;3.3];%用于比较上一次聚类中心结果
for j=1:150%第一次聚类
i1_length=(attrib(j,2)-kresult(1,1))^2;
i2_length=(attrib(j,2)-kresult(2,1))^2;
i3_length=(attrib(j,2)-kresult(3,1))^2;
if ((i1_length<i2_length)&&(i1_length<i3_length))
k_class(1,j)=1;
sum_attrib(1,1)=sum_attrib(1,1)+attrib(j,2);
N1=N1+1;
elseif ((i2_length<i1_length)&&(i2_length<i3_length))
k_class(1,j)=2;
sum_attrib(2,1)=sum_attrib(2,1)+attrib(j,2);
N2=N2+1;
else
k_class(1,j)=3;
sum_attrib(3,1)=sum_attrib(3,1)+attrib(j,2);
N3=N3+1;
end
end
kresult(1,:)=sum_attrib(1,:)/N1;
kresult(2,:)=sum_attrib(2,:)/N2;
kresult(3,:)=sum_attrib(3,:)/N3;
lresult=kresult;%将第一次迭代结果存储
N1=0;N2=0;N3=0;%将棣属于每一类的数量重新归零
sum_attrib=zeros(3,1);
while(1)%永真循环用于寻找真实聚类中心
for j=1:150
i1_length=(attrib(j,2)-kresult(1,1))^2;
i2_length=(attrib(j,2)-kresult(2,1))^2;
i3_length=(attrib(j,2)-kresult(3,1))^2;
if ((i1_length<i2_length)&&(i1_length<i3_length))
k_class(1,j)=1;
sum_attrib(1,1)=sum_attrib(1,1)+attrib(j,2);
N1=N1+1;
elseif ((i2_length<i1_length)&&(i2_length<i3_length))
k_class(1,j)=2;
sum_attrib(2,1)=sum_attrib(2,1)+attrib(j,2);
N2=N2+1;
else
k_class(1,j)=3;
sum_attrib(3,1)=sum_attrib(3,1)+attrib(j,2);
N3=N3+1;
end
end
kresult(1,:)=sum_attrib(1,:)/N1;
kresult(2,:)=sum_attrib(2,:)/N2;
kresult(3,:)=sum_attrib(3,:)/N3;
if (lresult~=kresult)%如果上一次迭代结果与本次相同
lresult=kresult;%将此次聚类结果存储
N1=0;N2=0;N3=0;%将棣属于每一类的数量重新归零
sum_attrib=zeros(3,1);%重新归零
else
break;
end
end
subplot(2,2,2)
plot(attrib2(k_class==1),1,'r.',attrib2(k_class==2),2,'g.',attrib2(k_class==3),3,'b.');
xlabel('sepal width');
ylabel('class');
rsumpoint=0;%初始化正确样本数目
for j=1:150
if k_class(1,j)==attrib_(1,j)
rsumpoint=rsumpoint+1;
end
end
accuracy1_2=rsumpoint/150;
fprintf('一维聚类精度(2) = %f\n', accuracy1_2);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%-3
i1_length=0;i2_length=0;i3_length=0;%初始化距离每类中心的距离
N1=0;N2=0;N3=0;%将棣属于每一类的数量重新归零
sum_attrib=zeros(3,1);%重新归零
kresult=[1.4;4.7;6.0];%初始化聚类中心
lresult=[1.4;4.7;6.0];%用于比较上一次聚类中心结果
for j=1:150%第一次聚类
i1_length=(attrib(j,3)-kresult(1,1))^2;
i2_length=(attrib(j,3)-kresult(2,1))^2;
i3_length=(attrib(j,3)-kresult(3,1))^2;
if ((i1_length<i2_length)&&(i1_length<i3_length))
k_class(1,j)=1;
sum_attrib(1,1)=sum_attrib(1,1)+attrib(j,3);
N1=N1+1;
elseif ((i2_length<i1_length)&&(i2_length<i3_length))
k_class(1,j)=2;
sum_attrib(2,1)=sum_attrib(2,1)+attrib(j,3);
N2=N2+1;
else
k_class(1,j)=3;
sum_attrib(3,1)=sum_attrib(3,1)+attrib(j,3);
N3=N3+1;
end
end
kresult(1,:)=sum_attrib(1,:)/N1;
kresult(2,:)=sum_attrib(2,:)/N2;
kresult(3,:)=sum_attrib(3,:)/N3;
lresult=kresult;%将第一次迭代结果存储
N1=0;N2=0;N3=0;%将棣属于每一类的数量重新归零
sum_attrib=zeros(3,1);%重新归零
while(1)%永真循环用于寻找真实聚类中心
for j=1:150
i1_length=(attrib(j,3)-kresult(1,1))^2;
i2_length=(attrib(j,3)-kresult(2,1))^2;
i3_length=(attrib(j,3)-kresult(3,1))^2;
if ((i1_length<i2_length)&&(i1_length<i3_length))
k_class(1,j)=1;
sum_attrib(1,1)=sum_attrib(1,1)+attrib(j,3);
N1=N1+1;
elseif ((i2_length<i1_length)&&(i2_length<i3_length))
k_class(1,j)=2;
sum_attrib(2,1)=sum_attrib(2,1)+attrib(j,3);
N2=N2+1;
else
k_class(1,j)=3;
sum_attrib(3,1)=sum_attrib(3,1)+attrib(j,3);
N3=N3+1;
end
end
kresult(1,:)=sum_attrib(1,:)/N1;
kresult(2,:)=sum_attrib(2,:)/N2;
kresult(3,:)=sum_attrib(3,:)/N3;
if (lresult~=kresult)%如果上一次迭代结果与本次相同
lresult=kresult;%将此次聚类结果存储
N1=0;N2=0;N3=0;%将棣属于每一类的数量重新归零
sum_attrib=zeros(3,1);%重新归零
else
break;
end
end
subplot(2,2,3)
plot(attrib3(k_class==1),1,'r.',attrib3(k_class==2),2,'g.',attrib3(k_class==3),3,'b.');
xlabel('petal length');
ylabel('class');
rsumpoint=0;%初始化正确样本数目
for j=1:150
if k_class(1,j)==attrib_(1,j)
rsumpoint=rsumpoint+1;
end
end
accuracy1_3=rsumpoint/150;
fprintf('一维聚类精度(3) = %f\n', accuracy1_3);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%-4
i1_length=0;i2_length=0;i3_length=0;%初始化距离每类中心的距离
N1=0;N2=0;N3=0;%将棣属于每一类的数量重新归零
sum_attrib=zeros(3,1);%重新归零
kresult=[0.2;1.4;2.5];%初始化聚类中心
lresult=[0.2;1.4;2.5];%用于比较上一次聚类中心结果
for j=1:150%第一次聚类
i1_length=(attrib(j,4)-kresult(1,1))^2;
i2_length=(attrib(j,4)-kresult(2,1))^2;
i3_length=(attrib(j,4)-kresult(3,1))^2;
if ((i1_length<i2_length)&&(i1_length<i3_length))
k_class(1,j)=1;
sum_attrib(1,1)=sum_attrib(1,1)+attrib(j,4);
N1=N1+1;
elseif ((i2_length<i1_length)&&(i2_length<i3_length))
k_class(1,j)=2;
sum_attrib(2,1)=sum_attrib(2,1)+attrib(j,4);
N2=N2+1;
else
k_class(1,j)=3;
sum_attrib(3,1)=sum_attrib(3,1)+attrib(j,4);
N3=N3+1;
end
end
kresult(1,:)=sum_attrib(1,:)/N1;
kresult(2,:)=sum_attrib(2,:)/N2;
kresult(3,:)=sum_attrib(3,:)/N3;
lresult=kresult;%将第一次迭代结果存储
N1=0;N2=0;N3=0;%将棣属于每一类的数量重新归零
sum_attrib=zeros(3,1);%重新归零
while(1)%永真循环用于寻找真实聚类中心
for j=1:150
i1_length=(attrib(j,4)-kresult(1,1))^2;
i2_length=(attrib(j,4)-kresult(2,1))^2;
i3_length=(attrib(j,4)-kresult(3,1))^2;
if ((i1_length<i2_length)&&(i1_length<i3_length))
k_class(1,j)=1;
sum_attrib(1,1)=sum_attrib(1,1)+attrib(j,4);
N1=N1+1;
elseif ((i2_length<i1_length)&&(i2_length<i3_length))
k_class(1,j)=2;
sum_attrib(2,1)=sum_attrib(2,1)+attrib(j,4);
N2=N2+1;
else
k_class(1,j)=3;
sum_attrib(3,1)=sum_attrib(3,1)+attrib(j,4);
N3=N3+1;
end
end
kresult(1,:)=sum_attrib(1,:)/N1;
kresult(2,:)=sum_attrib(2,:)/N2;
kresult(3,:)=sum_attrib(3,:)/N3;
if (lresult~=kresult)%如果上一次迭代结果与本次相同
lresult=kresult;%将此次聚类结果存储
N1=0;N2=0;N3=0;%将棣属于每一类的数量重新归零
sum_attrib=zeros(3,1);%重新归零
else
break;
end
end
subplot(2,2,4)
plot(attrib4(k_class==1),1,'r.',attrib4(k_class==2),2,'g.',attrib4(k_class==3),3,'b.');
xlabel('petal width');
ylabel('class');
rsumpoint=0;%初始化正确样本数目
for j=1:150
if k_class(1,j)==attrib_(1,j)
rsumpoint=rsumpoint+1;
end
end
accuracy1_4=rsumpoint/150;
fprintf('一维聚类精度(4) = %f\n', accuracy1_4);
结果如下: