# 直接上代码
# https://github.com/kevinelstri/Hander-Marchine-Learning-Series/tree/main/K-means
import random
def main(k):
# 加载数据
input_list = input_data()
# 初始化聚簇中心
center_list = random_center(input_list, k)
# kmeans算法,终止条件为 start中心与end中心一致
result = kmans(input_list, center_list, k)
return result
def input_data(shape=(100, 2)):
input_list = [[0]*shape[1] for _ in range(shape[0])]
for i in range(shape[0]):
for j in range(shape[1]):
input_list[i][j] = random.uniform(0.0, 1.0)
return input_list
def random_center(input_list, k):
center_list = random.sample(input_list, k)
return center_list
def dist(a_list, b_list):
return sum([(a_list[inx] - b_list[inx]) ** 2 for inx in range(len(a_list))])
def my_mean(in_list):
return [sum(k)/float(len(k)) for k in zip(*in_list)]
def kmans(input_list, center_list, k):
start_center_list = center_list
end_center_list = []
result_list = [[] for _ in range(k)]
for i in range(len(input_list)):
temp_list = []
for j in range(k):
temp_list.append(dist(input_list[i], center_list[j]))
result_list[temp_list.index(min(temp_list))].append(input_list[i])
end_center_list = [my_mean(value) for value in result_list]
#print(end_center_list)
if start_center_list != end_center_list:
kmans(input_list, end_center_list, k) # 递归方式实现
return result_list, end_center_list
if __name__ == "__main__":
print(main(k=3))
面试手撕kmeans算法
于 2022-04-19 20:35:50 首次发布