Python K-means使用

import Pycluster as pc
import numpy as np
import sys
import matplotlib.pylab as pl


# Read data filename and desired number of clusters from command line
filename, n = sys.argv[1], int( sys.argv[2] )
# x and y coordinates, whitespace-separated
data = np.loadtxt( filename, usecols =(0,1) )
# Perform clustering and find centroids
# The function receives data from file(paramter 'data'),and executes Kcluster
#ncluster is the number of cluster and npass is  the trial number of primary number
#The function returns three values.The first value is a array .It includes all point of primary data set which was alloctated  index of cluster.
#The result receives by clustermap,here we do not appoint the distance function
clustermap = pc.kcluster( data, nclusters=n, npass=50 )[0]


#Get the mass of cluster
centroids = pc.clustercentroids( data, clusterid=clustermap )[0]
# Obtain distance matrix
m = pc.distancematrix( data )
# Find the masses of all clusters
mass = np.zeros( n )#Create a array which primary values is zero
x = [list(d)[0] for d in data]


y = [list(d)[1] for d in data]

xcenter=list()
ycenter=list()
for i in range(len(list(centroids))):
    xcenter.append(list(centroids)[i][0])
    ycenter.append(list(centroids)[i][1])


for c in clustermap:
    mass[c] += 1
# Create a matrix for individual silhouette coefficients
sil = np.zeros( n*len(data) )
sil.shape = ( len(data), n )
# Evaluate the distance for all pairs of points
for i in range( 0, len(data) ):
    for j in range( i+1, len(data) ):
        d = m[j][i]
        sil[i, clustermap[j] ] += d
        sil[j, clustermap[i] ] += d
# Normalize by cluster size (that is: form average over cluster)
for i in range( 0, len(data) ):
    sil[i,:] /= mass


# Evaluate the silhouette coefficient
s=0
for i in range( 0, len(data) ):
    c = clustermap[i]
    a = sil[i,c]
    b = min( sil[i, range(0,c)+range(c+1,n) ] )
    si = (b-a)/max(b,a) # This is the silhouette coeff of point i
    s+=si
# Print overall silhouette coefficient
print n, s/len(data)


pl.xlim(0,11)
pl.ylim(0,11)
pl.plot(x,y,'o')
pl.plot(xcenter,ycenter,'or')
pl.show()


数据

1 2.6
2 1
2 1.5
3 4
2.7 3.5
2.4 3.2
5.5 9.2
6 9
5.8 9
3 2
1 2.8
2 1.6
7 8
7.3 8.2
6.9 8.5


  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值