算法介绍和分析请参考:http://hi.baidu.com/chenjinandy/blog/item/5c4d5344b607c3248794739e.html # -*- coding:gb2312 -*- ''' Created on 2011-10-21 @author: chenjinandy ''' from math import sqrt def pearson(v1,v2): # Simple sums sum1=sum(v1) sum2=sum(v2) # Sums of the squares sum1Sq=sum([pow(v,2) for v in v1]) sum2Sq=sum([pow(v,2) for v in v2]) # Sum of the products pSum=sum([v1[i]*v2[i] for i in range(len(v1))]) # Calculate r (Pearson score) num=pSum-(sum1*sum2/len(v1)) den=sqrt((sum1Sq-pow(sum1,2)/len(v1))*(sum2Sq-pow(sum2,2)/len(v1))) if den==0: return 0 return 1.0-num/den v1=[1.22,1.33,4.55,6.45,6.31,4.12] v2=[1.22,1.33,4.55,6.45,6.31,4.12] print pearson(v1,v2) dataset=[(1,0),(0,1),(1,1),(2,1),(1,2),(2,2),(3,2),(6,6),(7,6),(8,6),(6,7),(7,7),(8,7),(9,7),(7,8),(8,8),(9,8),(8,9),(9,9)] print dataset #kmeans 的pythondex实现 def isequal(v1,v2): if (v1[0]*10==v2[0]*10)and(v1[1]*10==v2[1]*10): return 1 else: return 0 def order(dataset,k): type=[] temp=0 for i in range(len(dataset)): for j in range(k): if pearson(dataset[i],dataset[j])<pearson(dataset[i],dataset[temp]): temp=j type.append(temp) print dataset[i] print "被归为第 %s" %temp+"类" return type def newcenter(dataset,num,type): cnt=0 sumx=0.0 sumy=0.0 point=(0.0,0.0) for i in range(len(dataset)): if type[i]==num: sumx+=dataset[i][0] sumy+=dataset[i][1] cnt+=1 sumx=sumx/cnt sumy=sumy/cnt point=(sumx,sumy) return point def cluster(dataset,k): center=[] oldcenter=[] for i in range(k): # print i,dataset[i] cen=dataset[i] center.append(cen) oldcen=(0.0,0.0) oldcenter.append(oldcen) cnt=0 while cnt!=k: type=order(dataset,k) for i in range(k): center[i]=newcenter(dataset,i,type) if isequal(center[i],oldcenter[i]): cnt+=1 else: oldcenter[i]=center[i] for i in range(k): print "属于类别 %s" %i+"的点有如下:" for j in range(len(dataset)): if type[j]==i: print dataset[j] for i in range(len(dataset)): print dataset[i] cluster(dataset,3)
|
kmeans算法及python代码参考
最新推荐文章于 2024-01-25 20:57:36 发布