k均值例子 数据挖掘_大数据挖掘算法篇之K-Means实例

1 /****************************************************************************2 * *3 * KMEANS *4 * *5 *****************************************************************************/

6

7 #include

8 #include

9 #include

10 #include

11 #include

12

13 //FUNCTION PROTOTYPES14

15

16 //DEFINES

17 #define SUCCESS 1

18 #define FAILURE 0

19 #define TRUE 1

20 #define FALSE 0

21 #define MAXVECTDIM 20

22 #define MAXPATTERN 20

23 #define MAXCLUSTER 10

24

25

26

27

28

29 char *f2a(double x, intwidth){30 char cbuf[255];31 char *cp;32 inti,k;33 intd,s;34 cp=fcvt(x,width,&d,&s);35 if(s) {36 strcpy(cbuf,"-");37 }38 else{39 strcpy(cbuf," ");40 } /*endif*/

41 if (d>0) {42 for (i=0; i

45 cbuf[d+1]=0;46 cp+=d;47 strcat(cbuf,".");48 strcat(cbuf,cp);49 } else{50 if (d==0) {51 strcat(cbuf,".");52 strcat(cbuf,cp);53 }54 else{55 k=-d;56 strcat(cbuf,".");57 for (i=0; i

60 strcat(cbuf,cp);61 } /*endif*/

62 } /*endif*/

63 cp=&cbuf[0];64 returncp;65 }66

67

68

69

70 //***** Defined structures & classes *****

71 structaCluster {72 doubleCenter[MAXVECTDIM];73 int Member[MAXPATTERN]; //Index of Vectors belonging to this cluster

74 intNumMembers;75 };76

77 structaVector {78 doubleCenter[MAXVECTDIM];79 intSize;80 };81

82 classSystem {83 private:84 double Pattern[MAXPATTERN][MAXVECTDIM+1];85 aCluster Cluster[MAXCLUSTER];86 int NumPatterns; //Number of patterns

87 int SizeVector; //Number of dimensions in vector

88 int NumClusters; //Number of clusters

89 void DistributeSamples(); //Step 2 of K-means algorithm

90 int CalcNewClustCenters();//Step 3 of K-means algorithm

91 double EucNorm(int, int); //Calc Euclidean norm vector

92 int FindClosestCluster(int); //ret indx of clust closest to pattern93 //whose index is arg

94 public:95 voidsystem();96 int LoadPatterns(char *fname); //Get pattern data to be clustered

97 void InitClusters(); //Step 1 of K-means algorithm

98 void RunKMeans(); //Overall control K-means process

99 void ShowClusters(); //Show results on screen

100 void SaveClusters(char *fname); //Save results to file

101 voidShowCenters();102 };103 //输出聚类中心

104 voidSystem::ShowCenters(){105 inti,j;106 printf("Cluster centers:\n");107 for (i=0; i

111 printf("\n");112 getchar();113 }114

115 //读取文件

116 int System::LoadPatterns(char *fname)117 {118 FILE *InFilePtr;119 inti,j;120 doublex;121 if((InFilePtr = fopen(fname, "r")) ==NULL)122 returnFAILURE;123 fscanf(InFilePtr, "%d", &NumPatterns); //Read # of patterns 18数据量

124 fscanf(InFilePtr, "%d", &SizeVector); //Read dimension of vector 2维度

125 fscanf(InFilePtr, "%d", &NumClusters); //Read # of clusters for K-Means 2簇

126 for (i=0; i

127 for (j=0; j

128 fscanf(InFilePtr,"%lg",&x); //consisting of all elements

129 Pattern[i][j]=x;130 } /*endfor*/

131 } /*endfor*/

132 //输出所有数据元素

133 printf("Input patterns:\n");134 for (i=0; i

137 printf("\n--------------------\n");138 getchar();139 returnSUCCESS;140 }141 //***************************************************************************142 //InitClusters *143 //Arbitrarily assign a vector to each of the K clusters *144 //We choose the first K vectors to do this *145 //***************************************************************************146 //初始化聚类中心

147 voidSystem::InitClusters(){148 inti,j;149 printf("Initial cluster centers:\n");150 for (i=0; i

155 } /*endfor*/

156 for (i=0; i

158 } /*endfor*/

159 printf("\n");160 getchar();161 }162 //运行KMeans

163 voidSystem::RunKMeans(){164 intconverged;165 intpass;166 pass=1;167 converged=FALSE;168 //第N次聚类

169 while (converged==FALSE) {170 printf("PASS=%d\n",pass++);171 DistributeSamples();172 converged=CalcNewClustCenters();173 ShowCenters();174 getchar();175 } /*endwhile*/

176 }177 //在二维和三维空间中的欧式距离的就是两点之间的距离,二维的公式是178 //d = sqrt((x1-x2)^+(y1-y2)^)179 //通过这种运算,就可以把所有列的属性都纳入进来

180 double System::EucNorm(int p, int c){ //Calc Euclidean norm of vector difference

181 double dist,x; //between pattern vector, p, and cluster

182 int i; //center, c.

183 char zout[128];184 char znum[40];185 char *pnum;186 //187 pnum=&znum[0];188 strcpy(zout,"d=sqrt(");189 printf("The distance from pattern %d to cluster %d is calculated as:\n",p,c);190 dist=0;191 for (i=0; i

193 x=(Cluster[c].Center[i]-Pattern[p][i])*(Cluster[c].Center[i]-Pattern[p][i]);194 strcat(zout,f2a(x,4));195 if (i==0)196 strcat(zout,"+");197 //计算距离

198 dist += (Cluster[c].Center[i]-Pattern[p][i])*(Cluster[c].Center[i]-Pattern[p][i]);199 } /*endfor*/

200 printf("%s)\n",zout);201 returndist;202 }203 //查找最近的群集

204 int System::FindClosestCluster(intpat){205 inti, ClustID;206 doubleMinDist, d;207 MinDist =9.9e+99;208 ClustID=-1;209 for (i=0; i

216 } /*endfor*/

217 if (ClustID<0) {218 printf("Aaargh");219 exit(0);220 } /*endif*/

221 returnClustID;222 }223 //224 voidSystem::DistributeSamples(){225 inti,pat,Clustid,MemberIndex;226 //Clear membership list for all current clusters

227 for (i=0; i

232 Clustid= FindClosestCluster(pat);//查找最近的聚类中心

233 printf("patern %d assigned to cluster %d\n\n",pat,Clustid);234 //post this pattern to the cluster

235 MemberIndex=Cluster[Clustid].NumMembers;236 Cluster[Clustid].Member[MemberIndex]=pat;237 Cluster[Clustid].NumMembers++;238 } /*endfor*/

239 }240 //计算新的群集中心

241 intSystem::CalcNewClustCenters(){242 intConvFlag,VectID,i,j,k;243 doubletmp[MAXVECTDIM];244 char xs[255];245 char ys[255];246 char nc1[20];247 char nc2[20];248 char *pnc1;249 char *pnc2;250 char *fpv;251

252 pnc1=&nc1[0];253 pnc2=&nc2[0];254 ConvFlag=TRUE;255 printf("The new cluster centers are now calculated as:\n");256 for (i=0; i

257 pnc1=itoa(Cluster[i].NumMembers,nc1,10);258 pnc2=itoa(i,nc2,10);259 strcpy(xs,"Cluster Center");260 strcat(xs,nc2);261 strcat(xs,"(1/");262 strcpy(ys,"(1/");263 strcat(xs,nc1);264 strcat(ys,nc1);265 strcat(xs,")(");266 strcat(ys,")(");267 for (j=0; j

268 tmp[j]=0.0;269 } /*endfor*/

270 for (j=0; j

271 VectID=Cluster[i].Member[j];272 for (k=0; k

273 tmp[k] += Pattern[VectID][k]; //add (member) pattern elmnt into temp

274 if (k==0) {275 strcat(xs,f2a(Pattern[VectID][k],3));276 } else{277 strcat(ys,f2a(Pattern[VectID][k],3));278 } /*endif*/

279 } /*endfor*/

280 if(j

289 for (k=0; k

290 tmp[k]=tmp[k]/Cluster[i].NumMembers;291 if (tmp[k] !=Cluster[i].Center[k])292 ConvFlag=FALSE;293 Cluster[i].Center[k]=tmp[k];294 } /*endfor*/

295 printf("%s,\n",xs);296 printf("%s\n",ys);297 } /*endfor*/

298 returnConvFlag;299 }300 //输出聚类

301 voidSystem::ShowClusters(){302 intcl;303 for (cl=0; cl[%f,%f]\n", cl,Cluster[cl].Center[0],Cluster[cl].Center[1]);305 } /*endfor*/

306 }307

308 void System::SaveClusters(char *fname){309 }

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值