python实现简单的kmeans聚类算法

问题描述:一堆二维数据,用kmeans算法对其进行聚类,下面例子以分k=3为例。

原数据:

1.5,3.1
2.2,2.9
3,4
2,1
15,25
43,13
32,42
0,0
8,9
12,5
9,12
11,8
22,33
24,25

实现代码:

#coding:utf-8
from numpy import *
import string
import math

def loadDataSet(filename):
	dataMat = []
	fr = open(filename)
	for line in fr.readlines():
		element = line.strip('\n').split(',')
		number = []
		for i in range(len(element)):
			number.append(string.atof(element[i]))
		dataMat.append(number)
	return dataMat

def distEclud(vecA, vecB):
	count = len(vecA)
	s = 0.0
	for i in range(0, count):
		s = s + power(vecA[i]-vecB[i], 2)
	return sqrt(s)

def clusterOfElement(means, element):
	min_dist = distEclud(means[0], element)
	lable = 0
	for index in range(1, len(means)):
		dist = distEclud(means[index], element)
		if(dist < min_dist):
			min_dist = dist
			lable = index
	return lable
	
def getMean(cluster):	#cluster=[[[1,2],[1,2],[1,2]....],[[2,1],[2,1],[2,1],[2,1]...]]
	num = len(cluster)	#1个簇的num,如上为3个
	res = []
	temp = 0
	dim = len(cluster[0])
	for i in range(0, dim):
		for j in range(0, num):
			temp = temp + cluster[j][i]
		temp = temp / num
		res.append(temp)
	return res

def kMeans():
	k = 3
	data = loadDataSet('data.txt')
	print "data is ", data
	inite_mean = [[1.1, 1], [1, 1],[1,2]]
		
	count = 0
	while(count < 1000):
		count = count + 1
		clusters = []
		means = []
		for i in range(k):
			clusters.append([])
			means.append([])
		
		for index in range(len(data)):
			lable = clusterOfElement(inite_mean, data[index])
			clusters[lable].append(data[index])
			
		for cluster_index in range(k):
			mea = getMean(clusters[cluster_index])
			for mean_dim in range(len(mea)):	
				means[cluster_index].append(mea[mean_dim])
		
		for mm in range(len(means)):
			for mmm in range(len(means[mm])):
				inite_mean[mm][mmm] = means[mm][mmm]
		
	print "result cluster is ", clusters
	print "result means is ", inite_mean
	
kMeans()


  • 1
    点赞
  • 10
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值