1.基本Kmeans算法
选择K个点作为初始质心
repeat
将每个点指派到最近的质心,形成K个簇
重新计算每个簇的质心
until 簇不发生变化或达到最大迭代次数
时间复杂度:O(tKmn),其中,t为迭代次数,K为簇的数目,m为记录数,n为维数
空间复杂度:O((m+K)n),其中,K为簇的数目,m为记录数,n为维数
2.java实现
package com.ping;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
public class kmeans {
private int K;
private int dataNum;
private int dimNum;
private ArrayList<double[]> tuples;
private ArrayList<ArrayList<double[]>> cluster;
private ArrayList<double[]> means;
public kmeans(int K, int dataNum, int dimNum, ArrayList<double[]> tuples) {
this.K = K;
this.dataNum = dataNum;
this.dimNum = dimNum;
this.tuples = tuples;
cluster = new ArrayList<ArrayList<double[]>>();
for (int i = 0; i < K; ++i) {
cluster.add(new ArrayList<double[]>());
}
means = new ArrayList<double[]>();
}
// 获取 tupleA 和 tupleB的距离 , 下标0存放记录编号,下标1到dimNum存放实际元素
private double getDis(double tupleA[], double tupleB[]) {
double dis = 0;
for (int k = 1; k <= dimNum; ++k) {
dis += (tupleA[k] - tupleB[k]) * (tupleA[k] - tupleB[k]);