一、 实验目的:
1、 用k-means算法实现n个对象的数据集的聚类划分
2、 体会k-means算法在聚类划分过程中的优缺点
二、 实验内容:
实验题目:设数据集S={(1,1), (2,1), (1,2), (2,2), (4,3), (5,3), (4,4), (5,4)},令k=2, 试用k-means算法将X划分为k个簇
过程代码:
#include"stdio.h"
#include"stdlib.h"
#include<math.h>
#include<iostream>
using namespace std;
#define N 8
#define k 2
typedef struct{
float x;
float y;
}Point;
Point point[N]={
{1.0, 1.0}, {2.0, 1.0}, {1.0, 2.0}, {2.0, 2.0}, {4.0,3.0},
{5.0, 3.0}, {4.0, 4.0}, {5.0, 4.0}
};
int center[N];
Point mean[k];
float getdistance(Point pointl, Point point2);
void cluster();
float gete();
void getmean(int center[N]);
int main(){
mean[0].x =point[0].x;
mean[0].y=point[0].y;
mean[1].x =point[3].x;
mean[1].y =point[3].y;
mean[2].x =point[6].x;
mean[2].y =point[6].y;
int number=0;
float temp1, temp2;
//第一次聚类
cluster ();
number++;//number统计进行了几次聚类
//对第一次累类的结果进行误差平方和的计算
temp1 = gete();
printf ("the error1 is:%f\n", temp1);
//针矿第一次聚类的结果,重新计算是类中心
getmean(center);
//第二次聚类
cluster ();
number++;
temp2= gete();
printf ("the error2 is:%f\n", temp2);
//送代循环,直到两沈送代误差的差值在一定阈值范围内,则送代停止
while (fabs(temp1-temp2)> 0.5){
temp1 = temp2;
getmean(center);
cluster();
temp2 = gete();
number++;
printf ("the error%d is:%f\n", number, temp2);
}
printf ("the total number of cluster is:%d\n", number);
system ("pause");
return 0;
}
//计算距离
float getdistance(Point point1, Point point2){
float d;
d=sqrt((point1.x-point2.x)*(point1.x-point2.x) + (point1.y-point2.y)*(point1.y-point2.y));
return d;
}
//聚类函数
void cluster(){
float distance[N][k];
for (int i =0; i<N; i++){
for(int j=0;j<k;j++){
distance[i][j]= getdistance(point[i], mean[j]);
}
float min =9999.0;
for (int j=0; j<k;j++){
if (distance[i][j] <min){
min = distance[i][j];
center[i] =j;
}
}
printf("(%.0f,%.0f)\t in cluster-%d\n", point[i].x, point[i].y, center[i] +1);
}
}
float gete(){
float cnt=0, sum=0;
for (int i =0;i <N; i++){
for (int j=0; j<k; i++){
if (center[i]== j)
cnt =getdistance(point[i], mean[j]);
}
sum+=cnt;
}
return sum;
}
//重新计算簇中心
void getmean(int center[N]){
Point sum;
int count;
for (int i=0;i<N;i++){
sum.x = 0.0;
sum.y = 0.0;
count = 0;
for(int j=0;j<N;j++){
if(center[j]==i){
sum.x+=point[j].x;
sum.y+=point[j].y;
count++;
}
}
mean[i].x = sum.x/count;
mean[i].y = sum.y/count;
}
for(int i=0;i<k;i++){
printf("the new center point of %d is:\t(%f,%f)\n",i+1,mean[i].x,mean[i].y);
}
}