1 /****************************************************************************2 * *3 * KMEANS *4 * *5 *****************************************************************************/
6
7 #include
8 #include
9 #include
10 #include
11 #include
12
13 //FUNCTION PROTOTYPES14
15
16 //DEFINES
17 #define SUCCESS 1
18 #define FAILURE 0
19 #define TRUE 1
20 #define FALSE 0
21 #define MAXVECTDIM 20
22 #define MAXPATTERN 20
23 #define MAXCLUSTER 10
24
25
26
27
28
29 char *f2a(double x, intwidth){30 char cbuf[255];31 char *cp;32 inti,k;33 intd,s;34 cp=fcvt(x,width,&d,&s);35 if(s) {36 strcpy(cbuf,"-");37 }38 else{39 strcpy(cbuf," ");40 } /*endif*/
41 if (d>0) {42 for (i=0; i
45 cbuf[d+1]=0;46 cp+=d;47 strcat(cbuf,".");48 strcat(cbuf,cp);49 } else{50 if (d==0) {51 strcat(cbuf,".");52 strcat(cbuf,cp);53 }54 else{55 k=-d;56 strcat(cbuf,".");57 for (i=0; i
60 strcat(cbuf,cp);61 } /*endif*/
62 } /*endif*/
63 cp=&cbuf[0];64 returncp;65 }66
67
68
69
70 //***** Defined structures & classes *****
71 structaCluster {72 doubleCenter[MAXVECTDIM];73 int Member[MAXPATTERN]; //Index of Vectors belonging to this cluster
74 intNumMembers;75 };76
77 structaVector {78 doubleCenter[MAXVECTDIM];79 intSize;80 };81
82 classSystem {83 private:84 double Pattern[MAXPATTERN][MAXVECTDIM+1];85 aCluster Cluster[MAXCLUSTER];86 int NumPatterns; //Number of patterns
87 int SizeVector; //Number of dimensions in vector
88 int NumClusters; //Number of clusters
89 void DistributeSamples(); //Step 2 of K-means algorithm
90 int CalcNewClustCenters();//Step 3 of K-means algorithm
91 double EucNorm(int, int); //Calc Euclidean norm vector
92 int FindClosestCluster(int); //ret indx of clust closest to pattern93 //whose index is arg
94 public:95 voidsystem();96 int LoadPatterns(char *fname); //Get pattern data to be clustered
97 void InitClusters(); //Step 1 of K-means algorithm
98 void RunKMeans(); //Overall control K-means process
99 void ShowClusters(); //Show results on screen
100 void SaveClusters(char *fname); //Save results to file
101 voidShowCenters();102 };103 //输出聚类中心
104 voidSystem::ShowCenters(){105 inti,j;106 printf("Cluster centers:\n");107 for (i=0; i
111 printf("\n");112 getchar();113 }114
115 //读取文件
116 int System::LoadPatterns(char *fname)117 {118 FILE *InFilePtr;119 inti,j;120 doublex;121 if((InFilePtr = fopen(fname, "r")) ==NULL)122 returnFAILURE;123 fscanf(InFilePtr, "%d", &NumPatterns); //Read # of patterns 18数据量
124 fscanf(InFilePtr, "%d", &SizeVector); //Read dimension of vector 2维度
125 fscanf(InFilePtr, "%d", &NumClusters); //Read # of clusters for K-Means 2簇
126 for (i=0; i
127 for (j=0; j
128 fscanf(InFilePtr,"%lg",&x); //consisting of all elements
129 Pattern[i][j]=x;130 } /*endfor*/
131 } /*endfor*/
132 //输出所有数据元素
133 printf("Input patterns:\n");134 for (i=0; i
137 printf("\n--------------------\n");138 getchar();139 returnSUCCESS;140 }141 //***************************************************************************142 //InitClusters *143 //Arbitrarily assign a vector to each of the K clusters *144 //We choose the first K vectors to do this *145 //***************************************************************************146 //初始化聚类中心
147 voidSystem::InitClusters(){148 inti,j;149 printf("Initial cluster centers:\n");150 for (i=0; i
155 } /*endfor*/
156 for (i=0; i
158 } /*endfor*/
159 printf("\n");160 getchar();161 }162 //运行KMeans
163 voidSystem::RunKMeans(){164 intconverged;165 intpass;166 pass=1;167 converged=FALSE;168 //第N次聚类
169 while (converged==FALSE) {170 printf("PASS=%d\n",pass++);171 DistributeSamples();172 converged=CalcNewClustCenters();173 ShowCenters();174 getchar();175 } /*endwhile*/
176 }177 //在二维和三维空间中的欧式距离的就是两点之间的距离,二维的公式是178 //d = sqrt((x1-x2)^+(y1-y2)^)179 //通过这种运算,就可以把所有列的属性都纳入进来
180 double System::EucNorm(int p, int c){ //Calc Euclidean norm of vector difference
181 double dist,x; //between pattern vector, p, and cluster
182 int i; //center, c.
183 char zout[128];184 char znum[40];185 char *pnum;186 //187 pnum=&znum[0];188 strcpy(zout,"d=sqrt(");189 printf("The distance from pattern %d to cluster %d is calculated as:\n",p,c);190 dist=0;191 for (i=0; i
193 x=(Cluster[c].Center[i]-Pattern[p][i])*(Cluster[c].Center[i]-Pattern[p][i]);194 strcat(zout,f2a(x,4));195 if (i==0)196 strcat(zout,"+");197 //计算距离
198 dist += (Cluster[c].Center[i]-Pattern[p][i])*(Cluster[c].Center[i]-Pattern[p][i]);199 } /*endfor*/
200 printf("%s)\n",zout);201 returndist;202 }203 //查找最近的群集
204 int System::FindClosestCluster(intpat){205 inti, ClustID;206 doubleMinDist, d;207 MinDist =9.9e+99;208 ClustID=-1;209 for (i=0; i
216 } /*endfor*/
217 if (ClustID<0) {218 printf("Aaargh");219 exit(0);220 } /*endif*/
221 returnClustID;222 }223 //224 voidSystem::DistributeSamples(){225 inti,pat,Clustid,MemberIndex;226 //Clear membership list for all current clusters
227 for (i=0; i
232 Clustid= FindClosestCluster(pat);//查找最近的聚类中心
233 printf("patern %d assigned to cluster %d\n\n",pat,Clustid);234 //post this pattern to the cluster
235 MemberIndex=Cluster[Clustid].NumMembers;236 Cluster[Clustid].Member[MemberIndex]=pat;237 Cluster[Clustid].NumMembers++;238 } /*endfor*/
239 }240 //计算新的群集中心
241 intSystem::CalcNewClustCenters(){242 intConvFlag,VectID,i,j,k;243 doubletmp[MAXVECTDIM];244 char xs[255];245 char ys[255];246 char nc1[20];247 char nc2[20];248 char *pnc1;249 char *pnc2;250 char *fpv;251
252 pnc1=&nc1[0];253 pnc2=&nc2[0];254 ConvFlag=TRUE;255 printf("The new cluster centers are now calculated as:\n");256 for (i=0; i
257 pnc1=itoa(Cluster[i].NumMembers,nc1,10);258 pnc2=itoa(i,nc2,10);259 strcpy(xs,"Cluster Center");260 strcat(xs,nc2);261 strcat(xs,"(1/");262 strcpy(ys,"(1/");263 strcat(xs,nc1);264 strcat(ys,nc1);265 strcat(xs,")(");266 strcat(ys,")(");267 for (j=0; j
268 tmp[j]=0.0;269 } /*endfor*/
270 for (j=0; j
271 VectID=Cluster[i].Member[j];272 for (k=0; k
273 tmp[k] += Pattern[VectID][k]; //add (member) pattern elmnt into temp
274 if (k==0) {275 strcat(xs,f2a(Pattern[VectID][k],3));276 } else{277 strcat(ys,f2a(Pattern[VectID][k],3));278 } /*endif*/
279 } /*endfor*/
280 if(j
289 for (k=0; k
290 tmp[k]=tmp[k]/Cluster[i].NumMembers;291 if (tmp[k] !=Cluster[i].Center[k])292 ConvFlag=FALSE;293 Cluster[i].Center[k]=tmp[k];294 } /*endfor*/
295 printf("%s,\n",xs);296 printf("%s\n",ys);297 } /*endfor*/
298 returnConvFlag;299 }300 //输出聚类
301 voidSystem::ShowClusters(){302 intcl;303 for (cl=0; cl[%f,%f]\n", cl,Cluster[cl].Center[0],Cluster[cl].Center[1]);305 } /*endfor*/
306 }307
308 void System::SaveClusters(char *fname){309 }