# 层次聚类算法的python实现

from numpy import *
from math import *
from operator import *
def dist(a,b):#a,b is mat
c=(a-b)*(a-b).T
return sqrt(sum(c))

def centroid(a): #a is mat
return a.mean(0)

def resolveList(List):#分解一个List，如将[[1,2],[[1,2],[1,2]]]分解为[[1,2],[1,2],[1,2]]
L=[]
for i in range(len(List)):
if type(List[i][0])!=list:
L.append(List[i])
continue
else:
List1=resolveList(List[i])
for j in List1:
L.append(j)
return L

def cluster(data,num): #data is list   num is the number of clusters you want
dataList=data
clusterList=range(len(dataList))
while len(dataList)>num:
dataMat=map(mat,data)
distDic={}
for i in range(len(dataMat)-1):
for j in range(len(dataMat))[i+1:]:
distDic[(i,j)]=dist(dataMat[i].mean(0),dataMat[j].mean(0))
m,n=sorted(distDic.items(),key=itemgetter(1))[0][0]
tempList=[]
tempList.append(dataList[m])
tempList.append(dataList[n])
L=resolveList(tempList)
del dataList[n]
del dataList[m]
dataList.append(L)
print dataList  #打出每一次结果

data=[[0.4005,0.5306],[0.2148,0.3854],[0.3457,0.3156],[0.2652,0.1875],[0.0789,0.4139],[0.4548,0.3022]]
cluster(data,2)

[[0.4005, 0.5306], [0.2148, 0.3854], [0.2652, 0.1875], [0.0789, 0.4139], [[0.3457, 0.3156], [0.4548, 0.3022]]]
[[0.4005, 0.5306], [0.2652, 0.1875], [[0.3457, 0.3156], [0.4548, 0.3022]], [[0.2148, 0.3854], [0.0789, 0.4139]]]
[[0.4005, 0.5306], [[0.2148, 0.3854], [0.0789, 0.4139]], [[0.2652, 0.1875], [0.3457, 0.3156], [0.4548, 0.3022]]]
[[0.4005, 0.5306], [[0.2148, 0.3854], [0.0789, 0.4139], [0.2652, 0.1875], [0.3457, 0.3156], [0.4548, 0.3022]]]

05-15 8012

12-09 2万

12-01 6504

01-05 1089

11-13 6088

03-08 360

06-01 61

10-14 3355

02-14 243

09-19 631