# coding=utf-8
import os
import sys
from pylab import *
import random
import types
cny = {}
def genRandom():
f = rand(3,2)
print "random number:",f
return f
def loaddata():
vals = []
f = open("D:/kmeans.txt","r")
while True:
line = f.readline();
if not line.strip():break
if len(line) == 0: break
vals.append(line.strip("\n").split("\t"))
f.close()
return vals
def euclid(list1,list2):
res = 0.0
#print len(list1)
for index in range(0,len(list1)):
res += (float(list1[index]) - float(list2[index]))**2
return sqrt(res)
'''
根据随机的点聚成原始的分类中心
'''
def classfy(rn,val):
cls = {}
for d in range(0,len(rn)):
cls[d+1] = ""
for xx in val:
max = 0
count = 0
for v in rn:
fv = euclid(v,xx)
if(fv > max):
count+=1
max = fv
if type(cls[count]) is types.StringType:
cls[count] += ",".join(xx)+":"
vc = {}
for kin in cls.keys():
ds = cls[kin]
vc[kin] = ds.split(":")
dx = kmeans(vc)
return rn,vc
'''
t1 0.38,t2 0.6
'''
def canopy(vals,t1,t2):
rnum = random.randint(0,len(vals)-1)
center1 = vals[rnum]
del vals[rnum]
ccny = []
#ccny = {}
#cstrong = []
#cweak = []
for vl in vals:
dis = euclid(vl,center1)
if dis <= t1:
#cstrong.append(vl)
ccny.append(vl)
vals.remove(vl)
elif dis > t1 and dis <=t2:
ccny.append(vl)
#cweak.append(vl)
#vals.remove(vl)
#ccny[0] = cstrong
#ccny[1] = cweak
if not len(ccny) == 0:
cny[",".join(center1)] = ccny
print "the remaning size of vals:",len(vals)
if len(vals) == 0:
return cny
else:
return canopy(vals,t1,t2)
def kmeans(vc):
res = []
for fh in vc:
size = 0
resultx = 0.0
resulty = 0.0
for dv in vc[fh]:
dvs = dv.split(",")
size += 1
if len(dvs) == 2:
resultx += float(dvs[0])
resulty += float(dvs[1])
ww = []
ww.append(resultx/size)
ww.append(resulty/size)
res.append(ww)
return res
val = loaddata()
print canopy(val,0.38,0.6)
import os
import sys
from pylab import *
import random
import types
cny = {}
def genRandom():
f = rand(3,2)
print "random number:",f
return f
def loaddata():
vals = []
f = open("D:/kmeans.txt","r")
while True:
line = f.readline();
if not line.strip():break
if len(line) == 0: break
vals.append(line.strip("\n").split("\t"))
f.close()
return vals
def euclid(list1,list2):
res = 0.0
#print len(list1)
for index in range(0,len(list1)):
res += (float(list1[index]) - float(list2[index]))**2
return sqrt(res)
'''
根据随机的点聚成原始的分类中心
'''
def classfy(rn,val):
cls = {}
for d in range(0,len(rn)):
cls[d+1] = ""
for xx in val:
max = 0
count = 0
for v in rn:
fv = euclid(v,xx)
if(fv > max):
count+=1
max = fv
if type(cls[count]) is types.StringType:
cls[count] += ",".join(xx)+":"
vc = {}
for kin in cls.keys():
ds = cls[kin]
vc[kin] = ds.split(":")
dx = kmeans(vc)
return rn,vc
'''
t1 0.38,t2 0.6
'''
def canopy(vals,t1,t2):
rnum = random.randint(0,len(vals)-1)
center1 = vals[rnum]
del vals[rnum]
ccny = []
#ccny = {}
#cstrong = []
#cweak = []
for vl in vals:
dis = euclid(vl,center1)
if dis <= t1:
#cstrong.append(vl)
ccny.append(vl)
vals.remove(vl)
elif dis > t1 and dis <=t2:
ccny.append(vl)
#cweak.append(vl)
#vals.remove(vl)
#ccny[0] = cstrong
#ccny[1] = cweak
if not len(ccny) == 0:
cny[",".join(center1)] = ccny
print "the remaning size of vals:",len(vals)
if len(vals) == 0:
return cny
else:
return canopy(vals,t1,t2)
def kmeans(vc):
res = []
for fh in vc:
size = 0
resultx = 0.0
resulty = 0.0
for dv in vc[fh]:
dvs = dv.split(",")
size += 1
if len(dvs) == 2:
resultx += float(dvs[0])
resulty += float(dvs[1])
ww = []
ww.append(resultx/size)
ww.append(resulty/size)
res.append(ww)
return res
val = loaddata()
print canopy(val,0.38,0.6)
数据kmeans.txt:
0.3 0.25
0.9 0.6
0.5 0.9
0.2 0.15
0.3 1.0
0.85 0.55
0.95 0.4
0.2 0.3
0.12 0.1
0.1 0.9
0.8 0.5
0.13 0.3
0.95 0.75
0.4 0.9
0.25 0.8
0.8 0.65
0.2 0.85
0.1 0.3