实现K均值算法。。。
书上的例子是错的!那个数据集,用书上选的那几个初始点,则一开始就是收敛的。
实现时,顺便学了matplotlib.animation,可以把聚类过程用动态图显示出来。
代码如下:
# coding: utf-8
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import random
import itertools
def rand_RGB():
return np.random.random(3)
def cal_dis(x, y):
return np.linalg.norm(x-y)
# return sum(np.abs(x-y))
def Kmeans(nb_cluster, points):
nb_sample = len(points)
init_points_id = random.choice(list(itertools.combinations(range(nb_sample), nb_cluster)))
centers = points[init_points_id, :]
colors = [rand_RGB() for i in range(nb_cluster)]
# colors = ['r', 'g', 'b']
imgs = []
max_iter = 100
for _ i