7.1 K-Means
''' Kmeans
1 . 设置中心点 a.随机抽取一个样本作为初始中心点 m,n = X.shape
b.设置距离矩阵[m,n_clusters-1]和中线点矩阵[n_clusters,n]
c.计算所有样本到簇中心的距离,找出每个样本到簇距离的最小值求和np.min(distances)
d.以[0, sum(nds))的均匀分布产生一个随机值,判断随机值r落于哪个区域, 对应实例被选为簇中心.
2 . 训练
3 . 预测
4 . 测试
'''
import numpy as np
class keans():
def __init__(self, k_clusters, tol=1e-4, max_iter=300, n_init=10):
self.k_clusters = k_clusters
self.tol = tol
self.max_iter = max_iter
self.n_init = n_init
def _init_centers_kpp(self, X, n_clusters):
m , n = X.shape
distances = np.empty([m,n_clusters-1])
centers = np.empty([n_clusters,n])
np.copyto(centers[0],X[np.random.randint(m)])
for i in range(1,n_clusters):
distances[:,i-1]=np.sum((X-centers[i-1])**2,axis=1)
if i == 1:
mds = distances[:,0]
else:
mds = np.min(distances[:,:i-1], axis=1)
sum1 = np.sum(mds,axis=0)*np.random.random()
for j in range(m):
sum1 -= mds[j]
if sum1 < 0:
break
np.copyto(centers[i],X[j])
return centers
def _kmeans(self, X):
m,n = X.shape
labels = np.empty(m)
centers_old = np.empty([self.k_clusters,n])
distances = np.empty([m,self.k_clusters])
centers = self._init_centers_kpp(X, self.k_clusters)
for _ in range(self.max_iter):
for i in range(self.k_clusters):
distances[:, i] = np.sum( (X-centers[i])**2,axis = 1 )
labels = np.argmin(distances,axis=1)
np.copyto(centers_old, centers)
for i in range(self.k_clusters):
cluster = X[labels==i]
if cluster.size == None:
return None
centers[i] = np.mean(cluster,axis=0)
delta_centers = np.sqrt(np.sum((centers - centers_old) ** 2, axis=1))
if np.all(delta_centers < self.tol):
break
sse = np.sum(distances[range(m),labels])
return labels,centers,sse
def predict(self, X):
result = np.empty((self.n_init, 3), dtype=np.object)
for i in range(self.n_init):
res = None
while not res:
res = self._kmeans(X)
result[i] = res
k = np.argmin(result[:,-1],axis=0)
labels, self.centers_, self.sse_ = result[k]
return labels
def circle(r=5,m=100):
x1 = []
x2 = []
for i in range(3):
a = np.random.randn(m)*0.3+i
b = np.sqrt(r**2-a)*0.3+i
x1.append(a)
x2.append(b)
x1 = np.array(x1).reshape(-1,1)
x2 = np.array(x2).reshape(-1,1)
X = np.concatenate([x1,x2],axis=1)
return X
if __name__ == '__main__':
X = circle(r=5,m=100)
kean = keans(k_clusters=3, tol=1e-4, max_iter=100, n_init=60)
labels = kean.predict(X[:10])
print(labels)
'''
[1 1 1 1 1 1 1 1 1 1]
Process finished with exit code 0
'''