# -*- coding: utf-8 -*-
"""
Spyder Editor
This is a temporary script file.
"""
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.io as sio
import random
import math
maps = {}
clr = ['blue', 'green', 'yellow','red','black','cyan', 'pink']
def calcCetroid(X, sub):
sn = sub.shape[0]
n,p = X.shape
centroid = np.zeros(p, dtype=float)
for i in range(sn):
centroid += X[sub[i],:]
return centroid/float(sn)
def k_medoids(X,k):
iterNums = 1000
n, p = X.shape
distance = np.zeros((k,n))
ctrx = np.zeros((k,p))
iter_ctrs = np.zeros((iterNums, k, p))
cetroid = random.sample(range(n), k)
for i in range(k):
ctrx[i] = np.array(X[cetroid[i],:])
for i in range(100):
for j in range(k):
for l in range(n):
distance[j][l] = math.sqrt(np.sum(np.power(ctrx[j]-X[l,:],2)))
dist_index = np.argsort(distance, axis=0)
maps.clear()
for j in range(n):
keys = dist_index[0][j]
if not maps.has_key(keys):
maps[keys] = []
maps[keys].append(j)
ctrx = np.zeros((k,p))
for (j, l) in zip(range(k), maps.itervalues()):
ctrx[j] = calcCetroid(X, np.array(l)) # jth centroids
dd = np.zeros(len(l))
for ii in range(len(l)): # find medoid
dd[ii] = math.sqrt(np.sum(np.power(ctrx[j]-X[l[ii],:],2)))
dd = np.argsort(dd)
ctrx[j] = X[l[dd[0]],:]
for (i,j) in zip(range(k), maps.itervalues()):
plt.scatter(X[j,0], X[j,1], c=clr[i])
plt.show()
def k_means(X,k):
"""
Input:
X---data point features, n-by-p matrix
k-- the number of clusters
output:
idx cluster label
ctrx-- cluster centers, k-by-p matrix
iter_ctrs---cluster centers of each iteration, K-by-p-by-iter 3D matrix
"""
iterNums = 100
n, p = X.shape
distance = np.zeros((k,n))
ctrx = np.zeros((k,p))
iter_ctrs = np.zeros((iterNums, k, p))
cetroid = random.sample(range(n), k)
for i in range(k):
ctrx[i] = np.array(X[cetroid[i],:])
for i in range(100):
for j in range(k):
for l in range(n):
distance[j][l] = math.sqrt(np.sum(np.power(ctrx[j]-X[l,:],2)))
dist_index = np.argsort(distance, axis=0)
maps.clear()
for j in range(n):
keys = dist_index[0][j]
if not maps.has_key(keys):
maps[keys] = []
maps[keys].append(j)
ctrx = np.zeros((k,p))
for (j, l) in zip(range(k), maps.itervalues()):
ctrx[j] = calcCetroid(X, np.array(l))
for (i,j) in zip(range(k), maps.itervalues()):
plt.scatter(X[j,0], X[j,1], c=clr[i])
plt.show()
if __name__ == "__main__":
dataSet = sio.loadmat("kmeans_data.mat")
X = dataSet["X"]
k = 5
#k_means(X,k)
k_medoids(X,k)