import numpy as np
'''欧式距离'''
def ecludDist(x, y):
return np.sqrt(sum(np.square(np.array(x) - np.array(y))))
'''计算簇的均值点'''
def clusterMean(dataset):
return sum(np.array(dataset)) / len(dataset)
'''生成随机均值点'''
def randCenter(dataset, k):
temp = []
while len(temp) < k:
index = np.random.randint(0, len(dataset)-1)
if index not in temp:
temp.append(index)
return np.array([dataset[i] for i in temp])
def kMeans(dataset, dist, center, k):
#all_kinds用于存放中间计算结果
all_kinds = []
for _ in range(k):
temp = []
all_kinds.append(temp)
#计算每个点到各均值点的距离
for i in dataset:
temp = []
for j in center:
temp.append(dist(i, j))
all_kinds[temp.index(min(temp))].append(i)
#更新均值点
center_ = np.array([clusterMean(i) for i in all_kinds])
#打印中间结果
for i in range(k):
print('第' + str(i) + '组:', all_kinds[i], end='\n')
print("------------------------------------------------------------------------------------")
if (center_ == center).all():
for i in range(k):
print('第'+str(i)+'组均值点:', center_[i], end='\n')
else:
#递归调用kMeans函数
center = center_
kMeans(dataset, dist, center, k)
def main(k):
'''生成随机点'''
x = [np.random.randint(0, 50) for _ in range(50)]
y = [np.random.randint(0, 50) for _ in range(50)]
points = [[i,j] for i, j in zip(x, y)]
initial_center = randCenter(dataset=points, k=k)
kMeans(dataset=points, dist=ecludDist, center=initial_center, k=k)
if __name__ == '__main__':
main(3)
注: