# 机器学习笔记-K-means算法

694人阅读 评论(0)

1、points：向量集合

2、kcentroids：初始k个类别的质心位置的集合

3、p：闵氏距离的计算参数，1就是manhattan距离，2就是欧式距离

def alg_kmeans(points, kcentroids, p):
assert(len(points) > 1);
assert(len(kcentroids) > 1);
assert(len(points[0]) == len(kcentroids[0]));

k = len(kcentroids);
# centroids for clusters, each centroid is a vector
centroids = kcentroids;
# clusters, set of point's index
c1 = [[] for row in range(k)];
c2 = [[] for row in range(k)];

change = True;
while change:
change = False;
# cluster once
c2 = PointstoCluster(points, centroids, p);
# if cluster change
change = (cmp(c1,c2) != 0);
# if not change, do again
if change:
c1 = c2;
# recalculate cluster centroids
centroids = RecalculateCentroids(points, centroids, c2);
return c2;

def PointstoCluster(points, kcentroids, p):
assert(len(points) > 1);
assert(len(kcentroids) > 1);
assert(len(points[0]) == len(kcentroids[0]));

k = len(kcentroids);
clusters = [[] for i in range(k)];

for i in range(len(points)):
# cal distance to each centroid
mindis = sys.float_info.max;
point = points[i];
clusteridx = -1;
for j in range(0, k):
dis = dis_minkowski(point, kcentroids[j], p);
if dis < mindis:
clusteridx = j;
mindis = dis;
clusters[clusteridx].append(i);

return clusters;

1、points：同上

2、k：类别数目

3、clusters：PointstoCluster的返回值

def RecalculateCentroids(points, k, clusters):
assert(len(points) > 1);
assert(k > 1);

dimention = len(points[0]);
centroids = [[0 for d in range(dimention)] for row in range(k)];
for i in range(k):
if len(clusters[i]) == 0:
continue;
for d in range(0, dimention):
centroids[i][d] = 0;
for pidx in clusters[i]:
centroids[i][d] += points[pidx][d];
centroids[i][d] /= len(clusters[i]);
return centroids;

0
0

* 以上用户言论只代表其个人观点，不代表CSDN网站的观点或立场
个人资料
• 访问：16332次
• 积分：291
• 等级：
• 排名：千里之外
• 原创：14篇
• 转载：2篇
• 译文：0篇
• 评论：2条
评论排行
最新评论