简单聚类

#学习自机器学习实战
from numpy import *
def loadDataSet(fileName):
    dataMat=[]
    fr=open(fileName)
    for line in fr.readlines():
        curLine=line.strip().split('\t')
        #curLine=float(curLine)
        dataMat.append(curLine)

    return dataMat
def distEclud(vecA,vecB):
    n=shape(vecA)[1]
    print("n=" ,n)
    s=0.0
    for i in range(n):
        a=vecA[0, i]
        print("a=" ,a)
        b=float(vecB[i])
        print("b=" ,b)
        s+=sqrt(power(a-b,2))
    print(s)
    return s
def randCent(dataSet,k):
    dataMat=array(dataSet)
    #print("dataMat:" ,dataMat)
    n=shape(dataMat)[1]
    centriose=mat(zeros((k,n)))
    for j in range(n):
        minJ=float(min(dataMat[:,j]))
        #print(minJ)
        maxJ=float(max(dataMat[:,j]))
        #print(maxJ)
        rangeJ=float(maxJ)-float(minJ)
        centriose[:,j]=minJ+rangeJ*random.rand(k,1)
    return centriose
def changtofloat(dataSet):
    m,n=shape(dataSet)
    dset=mat(zeros((m,n)))
    for i in range(m):
        for j in range(n):
            dset[i,j]=float(dataSet[i,j])
    print(dset)
    return dset
def kMeans(dataset,k,disMeans=distEclud,createCent=randCent):
    dataSet=array(dataset)
    m=shape(dataSet)[0]
    clusterAssment=mat(zeros((m,2)))
    cent=createCent(dataset,k)
    clusterChanged=True
    while clusterChanged:
        clusterChanged=False# unchange
        for i in range(m):
            minDict=inf;minIndex=-1#initilize
            for j in range(k):#find the minest distance
                distJI=disMeans(cent[j,:],dataSet[i,:])
                if distJI<minDict:
                    minDict=distJI
                    minIndex=j
            if clusterAssment[i,0]!=minIndex:clusterChanged=True#update the class
            clusterAssment[i,:]=minIndex,minDict**2
        dsett=changtofloat(dataSet)
        for cnt in range(k):
            pstInClust=dsett[nonzero(clusterAssment[:,0].A==cnt)[0]]
            cent[cnt,:]=mean(pstInClust,axis=0)#update the center
    return cent,clusterAssment

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值