```python
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets.samples_generator import make_blobs
from scipy.spatial.distance import cdist
x, y = make_blobs(n_samples=100, centers=6, random_state=1234, cluster_std=0.6)
class K_Means(object):
def __init__(self, n_clusters=6, max_iter=100, centroids=[]):
self.n_clusters = n_clusters
self.max_iter = max_iter
self.centroids = np.array(centroids, dtype=np.float)
def fit(self, data):
if (self.centroids.shape == (0,)):
self.centroids = data[np.random.randint(0, data.shape[0], self.n_clusters), :]
for i in range(self.max_iter):
distances = cdist(data, self.centroids)
c_ind = np.argmin(distances, axis=1)
for i in range(self.n_clusters):
if i in c_ind:
self.centroids[i] = np.mean(data[c_ind == i], axis=0)
def predict(self, samples):
distances = cdist(samples, self.centroids)
c_ind = np.argmin(distances, axis=1)
return c_ind
def plotKMeans(x, y, centroids, subplot, title):
plt.subplot(subplot)
plt.scatter(x[:, 0], x[:, 1], c='r')
plt.scatter(centroids[:, 0], centroids[:, 1], c=np.array(range(6)), s=100)
plt.title(title)
plt.show()
kmeans = K_Means(max_iter=100, centroids=np.array([[2, 1], [2, 2], [2, 3], [2, 4], [2, 5], [2, 6]]))
plt.figure(figsize=(16, 6))
plotKMeans(x, y, kmeans.centroids, 121, 'initial State')
kmeans.fit(x)
plt.figure(figsize=(16, 6))
plotKMeans(x, y, kmeans.centroids, 122, 'Final State')
x_new = np.array([[0, 0], [10, 7]])
y_pred = kmeans.predict(x_new)
print(y_pred)