# 层次聚类算法的python实现

274人阅读 评论(0)

from numpy import *
from math import *
from operator import *
def dist(a,b):#a,b is mat
c=(a-b)*(a-b).T
return sqrt(sum(c))

def centroid(a): #a is mat
return a.mean(0)

def resolveList(List):#分解一个List，如将[[1,2],[[1,2],[1,2]]]分解为[[1,2],[1,2],[1,2]]
L=[]
for i in range(len(List)):
if type(List[i][0])!=list:
L.append(List[i])
continue
else:
List1=resolveList(List[i])
for j in List1:
L.append(j)
return L

def cluster(data,num): #data is list   num is the number of clusters you want
dataList=data
clusterList=range(len(dataList))
while len(dataList)>num:
dataMat=map(mat,data)
distDic={}
for i in range(len(dataMat)-1):
for j in range(len(dataMat))[i+1:]:
distDic[(i,j)]=dist(dataMat[i].mean(0),dataMat[j].mean(0))
m,n=sorted(distDic.items(),key=itemgetter(1))[0][0]
tempList=[]
tempList.append(dataList[m])
tempList.append(dataList[n])
L=resolveList(tempList)
del dataList[n]
del dataList[m]
dataList.append(L)
print dataList  #打出每一次结果

data=[[0.4005,0.5306],[0.2148,0.3854],[0.3457,0.3156],[0.2652,0.1875],[0.0789,0.4139],[0.4548,0.3022]]
cluster(data,2)

[[0.4005, 0.5306], [0.2148, 0.3854], [0.2652, 0.1875], [0.0789, 0.4139], [[0.3457, 0.3156], [0.4548, 0.3022]]]
[[0.4005, 0.5306], [0.2652, 0.1875], [[0.3457, 0.3156], [0.4548, 0.3022]], [[0.2148, 0.3854], [0.0789, 0.4139]]]
[[0.4005, 0.5306], [[0.2148, 0.3854], [0.0789, 0.4139]], [[0.2652, 0.1875], [0.3457, 0.3156], [0.4548, 0.3022]]]
[[0.4005, 0.5306], [[0.2148, 0.3854], [0.0789, 0.4139], [0.2652, 0.1875], [0.3457, 0.3156], [0.4548, 0.3022]]]

0
0

* 以上用户言论只代表其个人观点，不代表CSDN网站的观点或立场
个人资料
• 访问：4839次
• 积分：108
• 等级：
• 排名：千里之外
• 原创：5篇
• 转载：5篇
• 译文：0篇
• 评论：4条
文章存档
评论排行
最新评论