/**
*function K-means EuclideanDistance
*author aris_zzy E-mail: aris_zzy@126.com
*Date 2006-6-24
*Cluster No. 0,1,2,3,4
*/
import java.lang.Math;
import java.io.*;
import java.util.Arrays;
import java.text.DecimalFormat;
public class KMeansEuclideanDistance {
//to computer the EuclideanDistance
private static double EuDistance(double array1[], double array2[]) {
double Dist = 0.0;
if (array1.length != array2.length) {
System.out.println("the number of the arrary is ineql");
} else {
for (int i = 0; i < array2.length; i++) {
Dist = Dist + (array1[i] - array2[i]) * (array1[i] - array2[i]);
}
}
return Math.sqrt(Dist);
}
//to print the float Array
private static void printArray(double array[]) {
System.out.print('[');
for (int i = 0; i < array.length; i++) {
System.out.print(FORMAT.format(array[i]));
if ((i + 1) < array.length) {
System.out.print(", ");
}
}
System.out.println(']');
}
//to print the int Array
private static void printArray(int array[]) {
System.out.print('[');
for (int i = 0; i < array.length; i++) {
System.out.print(array[i]);
if ((i + 1) < array.length) {
System.out.print(", ");
}
}
System.out.println(']');
}
/*
private static void printArray(abstract array[]) {
System.out.print('[');
for (int i = 0; i < array.length; i++) {
System.out.print(FORMAT.format(array[i]));
if ((i + 1) < array.length) {
System.out.print(", ");
}
}
System.out.println(']');
}
*/
//to print the Matrix
private static void printMatrix(double Matrix[][], int row, int col){
System.out.println("Matrix is:");
System.out.println('{');
for(int i=0; i<row; i++){
//System.out.print('[');
for (int j = 0; j < col; j++) {
//Matrix[i][j]=-1.0; for test
System.out.print(FORMAT.format(Matrix[i][j]));
if ((j + 1) < col) {
System.out.print(", ");
}
}
//System.out.println(']');
System.out.println();
}
System.out.println('}');
}
private static DecimalFormat FORMAT = new DecimalFormat("00.00");
//Randperm the 前M form the randpern(N)
private static int[] Randperm(int N,int M){
double[] PermF=new double[N];
int[] PermI=new int[N];
int[] RetArray=new int[M];
double tempF;
int tempI;
for(int i=0; i<N; i++){
PermF[i]=Math.random();
PermI[i]=i;
}
//sort choosing the big to forward
for(int i=0; i<N-1; i++){
for(int j=i+1; j<N; j++){
if(PermF[i]<PermF[j]){
tempF=PermF[i];
tempI=PermI[i];
PermF[i]=PermF[j];
PermI[i]=PermI[j];
PermF[j]=tempF;
PermI[j]=tempI;
}
}
}
for(int i=0; i<M; i++){
RetArray[i]=PermI[i];
}
return RetArray;
}
//the judge the equal two Array
private static boolean IsEqual(int Array1[],int Array2[]){
for(int i=0; i<Array1.length; i++){
if(Array1[i]!=Array2[i]){
return false;
}
}
return true;
}
//get the location of min element from the Array
private static int MinLocation(double Array[]){
int Location;
double Min;
//initial
Min=Array[0];
Location=0;
//Iteration
for(int i=1; i<Array.length; i++){
if(Array[i]<Min){
Location=i;
Min=Array[i];
}
}
return Location;
}
//to clustering the data Matrix
private static int[] KMeans(double Matrix[][], int row, int col,int ClusterNum){
int[] CenterId=new int[ClusterNum];
int[] Cid=new int[row];
int[] oldCid=new int[row];
int[] NumOfEveryCluster=new int[ClusterNum];
double[][] ClusterCenter=new double[ClusterNum][col];
double[] CenterDist=new double[ClusterNum];
//initial the ClusterCenter
//random get the ClusterCenter
CenterId=Randperm(row,ClusterNum);
for(int i=0; i<ClusterNum; i++){
for(int j=0; j<col; j++){
ClusterCenter[i][j]=Matrix[ CenterId[i] ][j];
}
}
//initial the oldCide
for(int i=0; i<row; i++){
oldCid[i]=1;
}
int MaxIter=200;
int Iter=1;
while( !IsEqual(Cid,oldCid) || Iter<MaxIter){
//change it on 2006 6 25
for(int i=0;i<row;i++){
oldCid[i]=Cid[i];
}
//Implement the hmeans algorithm
//For each Point, find the distance
//to all cluster centers
for(int i=0;i<row;i++){
for(int j=0; j<ClusterNum;j++){
CenterDist[j]=EuDistance(Matrix[i], ClusterCenter[j] );
}
Cid[i]=MinLocation(CenterDist);
}
//to get the number of every cluster
for(int j=0; j<ClusterNum; j++){
NumOfEveryCluster[j]=0;
for(int i=0; i<row; i++){
if(Cid[i]==j){
NumOfEveryCluster[j]=NumOfEveryCluster[j]+1;
}
}
}
//Find the new cluster centers
//sum the ..........
for(int j=0; j<ClusterNum; j++){
for(int k=0; k<col; k++){
ClusterCenter[j][k]=0.0;
for(int i=0; i<row; i++){
if(Cid[i]==j){
ClusterCenter[j][k]=ClusterCenter[j][k]+Matrix[i][k];
}
}
}
}
//to means the sum...
for(int j=0; j<ClusterNum; j++){
for(int k=0; k<col; k++){
ClusterCenter[j][k]=ClusterCenter[j][k]/NumOfEveryCluster[j];
}
}
Iter=Iter+1;
}
return Cid;
}
//main to test the KMeans
public static void main(String[] args) {
int Matrix_row;
int Matrix_col;
int ClusterNum;
Matrix_col=5;
Matrix_row=10;
ClusterNum=3;
double[][] Matrix = new double[Matrix_row][Matrix_col];
//double[] y = new double[Matrix_row];
int[] List=new int[Matrix_row];
//int[] List1=new int[Matrix_row];
//List1[2]=1;
//double Dist;
for(int i=0; i<Matrix_row; i++){
for(int j=0; j<Matrix_col; j++){
Matrix[i][j]=10*Math.random();
}
}
//for test the code
double[][] DistMatrix=new double[Matrix_row][Matrix_row];
for(int i=0; i<Matrix_row; i++){
for(int j=0; j<Matrix_row; j++){
DistMatrix[i][j]=EuDistance(Matrix[i],Matrix[j]);
}
}
//printArray(Matrix[0]);
//System.out.print(MinLocation(Matrix[0]));
printMatrix(Matrix,Matrix_row,Matrix_col);
System.out.println("The DistMatrix is:");
printMatrix(DistMatrix,Matrix_row,Matrix_row);
/*
int[] Perm=new int[10];
Perm=Randperm(100,10);
for(int i=0;i<100;i++){
System.out.print(Perm[i]+" ");
}
*/
//System.out.print(IsEqual(List,List1 ));
List=KMeans(Matrix, Matrix_row, Matrix_col,ClusterNum);
System.out.println("The result of clustering, value of No.i means the ith belong to the No.value cluster");
printArray(List);
System.runFinalization();
}
}
程序有个bug,如果你对明白了kmeans的思想自然会修改, 如果你想直接用,那就................
参看matlab的(上一篇)