Python 实现K-means算法

最新推荐文章于 2024-08-11 16:00:02 发布

Lincolnfather

最新推荐文章于 2024-08-11 16:00:02 发布

阅读量1k

点赞数

分类专栏：机器学习算法文章标签： python 机器学习 kmeans

本文链接：https://blog.csdn.net/Lincolnfather/article/details/16116649

版权

机器学习算法专栏收录该内容

3 篇文章 0 订阅

订阅专栏

# coding=utf-8
import os
import sys
from pylab import *
import random
import types
cny = {}
def genRandom():
f = rand(3,2)
print "random number:",f
return f
def loaddata():
vals = []
f = open("D:/kmeans.txt","r")
while True:
line = f.readline();
if not line.strip():break
if len(line) == 0: break
vals.append(line.strip("\n").split("\t"))
f.close()
return vals
def euclid(list1,list2):
res = 0.0
#print len(list1)
for index in range(0,len(list1)):
res += (float(list1[index]) - float(list2[index]))**2
return sqrt(res)

'''
根据随机的点聚成原始的分类中心
'''
def classfy(rn,val):
cls = {}
for d in range(0,len(rn)):
cls[d+1] = ""
for xx in val:
max = 0
count = 0
for v in rn:
fv = euclid(v,xx)
if(fv > max):
count+=1
max = fv
if type(cls[count]) is types.StringType:
cls[count] += ",".join(xx)+":"
vc = {}
for kin in cls.keys():
ds = cls[kin]
vc[kin] = ds.split(":")
dx = kmeans(vc)
return rn,vc
'''
t1 0.38,t2 0.6
'''

def canopy(vals,t1,t2):

rnum = random.randint(0,len(vals)-1)
center1 = vals[rnum]
del vals[rnum]
ccny = []
#ccny = {}
#cstrong = []
#cweak = []
for vl in vals:
dis = euclid(vl,center1)
if dis <= t1:
#cstrong.append(vl)
ccny.append(vl)
vals.remove(vl)
elif dis > t1 and dis <=t2:
ccny.append(vl)
#cweak.append(vl)
#vals.remove(vl)
#ccny[0] = cstrong
#ccny[1] = cweak
if not len(ccny) == 0:
cny[",".join(center1)] = ccny
print "the remaning size of vals:",len(vals)
if len(vals) == 0:
return cny
else:
return canopy(vals,t1,t2)

def kmeans(vc):
res = []
for fh in vc:
size = 0
resultx = 0.0
resulty = 0.0
for dv in vc[fh]:
dvs = dv.split(",")
size += 1
if len(dvs) == 2:
resultx += float(dvs[0])
resulty += float(dvs[1])
ww = []
ww.append(resultx/size)
ww.append(resulty/size)
res.append(ww)
return res


val = loaddata()
print canopy(val,0.38,0.6)

数据kmeans.txt：

0.3 0.25
0.9 0.6
0.5 0.9
0.2 0.15
0.3 1.0
0.85 0.55
0.95 0.4
0.2 0.3
0.12 0.1
0.1 0.9
0.8 0.5
0.13 0.3
0.95 0.75
0.4 0.9
0.25 0.8
0.8 0.65
0.2 0.85
0.1 0.3