题目链接:Deep-ML
K-Means聚类:事先给定k个簇点以及n个points,经过max_iterations轮次的迭代,每次更新新的簇点。
import numpy as np
def euclidean_distance(a, b):
'''
Compute the Euclidean distance between 2d-vector and 1d-vector
:param a: ponits
:param b: centroid
:return: Euclidean distance [len(a)]
'''
return np.sqrt((a - b)**2).sum(axis=1)
def k_means_clustering(points, k, initial_centroids, max_iterations):
points = np.array(points)
centroids = initial_centroids
for _ in range(max_iterations):
distances = [euclidean_distance(points, centroid) for centroid in centroids] # 行数:簇点个数,列数:点个数
cluster = np.argmin(distances, axis=0) # 一维数组,展示每个点所对应的簇号
new_centroids = [points[cluster == i].mean(axis=0) for i in range(k)]
centroids = np.round(new_centroids, 4)
return [tuple(centroid) for centroid in centroids]
if __name__ == '__main__':
points = [(1, 2), (1, 4), (1, 0), (10, 2), (10, 4), (10, 0)]
k = 2
initial_centroids = [(1, 1), (10, 1)]
max_iterations = 10
print(k_means_clustering(points, k, initial_centroids, max_iterations))