class Kmeans():
def __init__():
self.k_dict_last = defaultdict(list)
self.k_dict = defaultdict(list)
self.k_node = []
def eclidDist(vec1, vec2):
sum = 0
for i inr range(len(vec1)):
sum += vec1[i] + vec2[i]
return sqrt(sum)
def avg(mat):
cluster_avg = []
for i in range(len(mat)):
sum = 0
for j in mat:
sum += mat[i][j]
cluster_avg.append(sum/len(mat)
return cluster_avg
def random_k(n, k):
k_node = []
for i in range(k):
num = random(n)
if num not in k_node:
k_node.append(num)
return k_node
def kmeans(dataset, k):
clusterStatus = 1
while(clusterStatus==1):
for i in range(len(dataset)):
index = 0
minDist = 100;
for j in k:
dis = eclidDist(dataset[i], self.k_node[j])
if dis < minDist:
minDist = dis
index = j
self.k_dict[index].append(dataset[i])
for i in range(k):
for j in self.k_dict:
self.k_node[i] = avg(self.k_dict[j])
if self.k_dict_last = self.k_dict:
clusterStatus = 0
self.k_dict_last = self.k_dict