文章给出层次聚类算法的python实现方法,并用《数据挖掘导论》里面的具体数据进行运行,代码如下:
from numpy import *
from math import *
from operator import *
def dist(a,b):#a,b is mat
c=(a-b)*(a-b).T
return sqrt(sum(c))
def centroid(a): #a is mat
return a.mean(0)
def resolveList(List):#分解一个List,如将[[1,2],[[1,2],[1,2]]]分解为[[1,2],[1,2],[1,2]]
L=[]
for i in range(len(List)):
if type(List[i][0])!=list:
L.append(List[i])
continue
else:
List1=resolveList(List[i])
for j in List1:
L.append(j)
return L
def cluster(data,num): #data is list num is the number of clusters you want
dataList=data
clusterList=range(len(dataList))
while len(dataList)>num:
dataMat=map(mat,data)
distDic={}
for i in range(len(dataMat)-1):
for j in range(len(dataMat))[i+1:]:
di