# -*- coding:gb2312 -*-
'''
Created on 2011-10-21
@author: chenjinandy
'''
from math import sqrt
def pearson(v1,v2):
# Simple sums
sum1=sum(v1)
sum2=sum(v2)
# Sums of the squares
sum1Sq=sum([pow(v,2) for v in v1])
sum2Sq=sum([pow(v,2) for v in v2])
# Sum of the products
pSum=sum([v1[i]*v2[i] for i in range(len(v1))])
# Calculate r (Pearson score)
num=pSum-(sum1*sum2/len(v1))
den=sqrt((sum1Sq-pow(sum1,2)/len(v1))*(sum2Sq-pow(sum2,2)/len(v1)))
if den==0: return 0
return 1.0-num/den
v1=[1.22,1.33,4.55,6.45,6.31,4.12]
v2=[1.22,1.33,4.55,6.45,6.31,4.12]
print pearson(v1,v2)
dataset=[(1,0),(0,1),(1,1),(2,1),(1,2),(2,2),(3,2),(6,6),(7,6),(8,6),(6,7),(7,7),(8,7),(9,7),(7,8),(8,8),(9,8),(8,9),(9,9)]
print dataset
#kmeans 的pythondex实现
def isequal(v1,v2):
if (v1[0]*10==v2[0]*10)and(v1[1]*10==v2[1]*10):
return 1
else:
return 0
def order(dataset,k):
type=[]
temp=0
for i in range(len(dataset)):
for j in range(k):
if pearson(dataset[i],dataset[j])<pearson(dataset[i],dataset[temp]):
temp=j
type.append(temp)
print dataset[i]
print "被归为第 %s" %temp+"类"
return type
def newcenter(dataset,num,type):
cnt=0
sumx=0.0
sumy=0.0
point=(0.0,0.0)
for i in range(len(dataset)):
if type[i]==num:
sumx+=dataset[i][0]
sumy+=dataset[i][1]
cnt+=1
sumx=sumx/cnt
sumy=sumy/cnt
point=(sumx,sumy)
return point
def cluster(dataset,k):
center=[]
oldcenter=[]
for i in range(k):
# print i,dataset[i]
cen=dataset[i]
center.append(cen)
oldcen=(0.0,0.0)
oldcenter.append(oldcen)
cnt=0
while cnt!=k:
type=order(dataset,k)
for i in range(k):
center[i]=newcenter(dataset,i,type)
if isequal(center[i],oldcenter[i]):
cnt+=1
else:
oldcenter[i]=center[i]
for i in range(k):
print "属于类别 %s" %i+"的点有如下:"
for j in range(len(dataset)):
if type[j]==i:
print dataset[j]
for i in range(len(dataset)):
print dataset[i]
cluster(dataset,3)