算法描述
(1)任意选择k个数据对象作为初始聚类中心
(2)根据簇中对象的平均值,将每个对象赋给最类似的簇
(3)更新簇的平均值,即计算每个对象簇中对象的平均值
(4)计算聚类准则函数E
(5)重复2-4步骤,直到准则函数E值不再进行变化
代码
public class Cluster {
public String clusterName; // 类簇名
private Medoid medoid; // 类簇的质点
private ArrayList<DataPoint> dataPoints; // 类簇中各样本点
public Cluster(String clusterName) {
this.clusterName = clusterName;
this.medoid = null; // will be set by calling setCentroid()
dataPoints = new ArrayList<DataPoint>();
}
public void setMedoid(Medoid c) {
medoid = c;
}
public Medoid getMedoid() {
return medoid;
}
public void addDataPoint(DataPoint dp) {
// called from CAInstance
dp.setCluster(this);// 标注该类簇属于某点,计算欧式距离
this.dataPoints.add(dp);
}
public void removeDataPoint(DataPoint dp) {
this.dataPoints.remove(dp);
}
public int getNumDataPoints() {
return this.dataPoints.size();
}
public DataPoint getDataPoint(int pos) {
return (DataPoint) this.dataPoints.get(pos);
}
public String getName() {
return this.clusterName;
}
public ArrayList<DataPoint> getDataPoints() {
return this.dataPoints;
}
}
public class ClusterAnalysis {
public Cluster[] clusters;// 所有类簇
private int miter;// 迭代次数
private ArrayList<DataPoint> dataPoints = new ArrayList<DataPoint>();// 所有样本点
private int dimNum;//维度
public ClusterAnalysis(int k, int iter, ArrayList<DataPoint> dataPoints,int dimNum) {
clusters = new Cluster[k];// 类簇种类数
for (int i = 0; i < k; i++) {
clusters[i] = new Cluster(i+"");
}
this.miter = iter;
this.dataPoints = dataPoints;
this.dimNum=dimNum;
}
public int getIterations() {
return miter;
}
public ArrayList<DataPoint>[] getClusterOutput() {
ArrayList<DataPoint> v[] = new ArrayList[clusters.length];
for (int i = 0; i < clusters.length; i++) {
v[i] = clusters[i].getDataPoints();
}
return v;
}
public void startAnalysis(double[][] medoids) {
setInitialMedoids(medoids);
double[][] newMedoids=medoids;
double[][] oldMedoids=new double[medoids.length][this.dimNum];
while(!isEqual(oldMedoids,newMedoids)){