ClusterMain.java
package eu.eodigos.kmean;
import java.util.Iterator;
import java.util.List;
import java.util.Vector;
import eu.eodigos.hibernate.bean.ClusterInput;
import eu.eodigos.hibernate.server.AccessDBServer;
import eu.eodigos.hibernate.server.AccessDBServerImp;
/**
* @author daoger
* @version 1.0
* @k-mean Cluster
*/
public class ClusterMain
{
public static void main(String[] args)
{
ClusterMain clusterMain = new ClusterMain();
clusterMain.clusterByDatabase();
}
/**
* Test with manual data
*/
public void test()
{
Vector<DataPoint> dataPoints = new Vector<DataPoint>();
dataPoints.add(new DataPoint(0.12, 0.21, 0.26, 0.45, 0.67, 0.23, 0.11, new Integer(1)));
dataPoints.add(new DataPoint(0.22, 0.23, 0.46, 0.11, 0.63, 0.11, 0.12, new Integer(2)));
dataPoints.add(new DataPoint(0.32, 0.34, 0.78, 0.17, 0.68, 0.67, 0.13, new Integer(3)));
dataPoints.add(new DataPoint(0.42, 0.45, 0.26, 0.42, 0.48, 0.39, 0.14, new Integer(4)));
dataPoints.add(new DataPoint(0.52, 0.29, 0.65, 0.59, 0.16, 0.74, 0.15, new Integer(5)));
dataPoints.add(new DataPoint(0.62, 0.25, 0.48, 0.61, 0.27, 0.16, 0.67, new Integer(6)));
dataPoints.add(new DataPoint(0.72, 0.35, 0.39, 0.20, 0.65, 0.26, 0.17, new Integer(7)));
dataPoints.add(new DataPoint(0.82, 0.20, 0.16, 0.29, 0.32, 0.61, 0.18, new Integer(8)));
dataPoints.add(new DataPoint(0.92, 0.71, 0.26, 0.37, 0.17, 0.81, 0.19, new Integer(9)));
dataPoints.add(new DataPoint(0.13, 0.39, 0.17, 0.41, 0.47, 0.37, 0.10, new Integer(10)));
dataPoints.add(new DataPoint(0.14, 0.23, 0.47, 0.93, 0.68, 0.28, 0.29, new Integer(11)));
dataPoints.add(new DataPoint(0.15, 0.57, 0.84, 0.19, 0.15, 0.39, 0.39, new Integer(12)));
dataPoints.add(new DataPoint(0.16, 0.19, 0.45, 0.38, 0.36, 0.82, 0.49, new Integer(13)));
dataPoints.add(new DataPoint(0.17, 0.89, 0.29, 0.39, 0.82, 0.58, 0.59, new Integer(14)));
// divide all user to 7 cluster
// 10000 stand for precision,the bigger of this value the more accuratly
ClusterAssistant clusterAssistant = new ClusterAssistant(3, 10000, dataPoints);
clusterAssistant.startAnalysis();
Vector[] v = clusterAssistant.getClusterOutput();
for (int i = 0; i < v.length; i++)
{
Vector tempV = v[i];
System.out.println("-----------Cluster" + i + "---------");
Iterator iter = tempV.iterator();
while (iter.hasNext())
{
DataPoint dpTemp = (DataPoint) iter.next();
String dps = "userid_" + dpTemp.getUserid() + "[" + dpTemp.getAvg1() + "," + dpTemp.getAvg2()
+ dpTemp.getAvg3() + "," + dpTemp.getAvg4() + "," + dpTemp.getAvg5() + "," + dpTemp.getAvg6()
+ "," + dpTemp.getAvg7() + "]";
System.out.println(dps);
}
}
}
/**
* get data from database and calaulate
*/
public void clusterByDatabase()
{
AccessDBServer access = new AccessDBServerImp();
Vector<DataPoint> dataPoints = new Vector<DataPoint>();
List clusterList = access.getAllClusterInputData();
for (Iterator iter = clusterList.iterator(); iter.hasNext();)
{
ClusterInput clusterInput = (ClusterInput) iter.next();
if (clusterInput != null)
{
dataPoints.add(new DataPoint(clusterInput.getAvgArch(), clusterInput.getAvgMon(), clusterInput
.getAvgMus(), clusterInput.getAvgBuil(), clusterInput.getAvgChap(), clusterInput.getAvgBeach(),
clusterInput.getAvgWalk(), clusterInput.getClusterId()));
}
}
// divide all user to 7 cluster
// 10000 stand for precision,the bigger of this value the more accuratly
ClusterAssistant clusterAssistant = new ClusterAssistant(7, 10000, dataPoints);
clusterAssistant.startAnalysis();
Vector[] v = clusterAssistant.getClusterOutput();
for (int i = 0; i < v.length; i++)
{
Vector tempV = v[i];
Iterator iter = tempV.iterator();
while (iter.hasNext())
{
DataPoint dpTemp = (DataPoint) iter.next();
access.updateClusterCateOfUsers(dpTemp.getUserid(), new Integer(i + 1));
}
}
}
}
ClusterAssistant.java
package eu.eodigos.kmean;
import java.util.Vector;
/**
* @author daoger
* @version 1.0
* @k-mean Cluster
*/
public class ClusterAssistant
{
private Cluster[] clusters;
private int miter;
private Vector mDataPoints = new Vector();
private double mSWCSS;
public ClusterAssistant(int k, int iter, Vector dataPoints)
{
clusters = new Cluster[k];
for (int i = 0; i < k; i++)
{
clusters[i] = new Cluster("Cluster" + i);
}
this.miter = iter;
this.mDataPoints = dataPoints;
}
private void calcSWCSS()
{
double temp = 0;
for (int i = 0; i < clusters.length; i++)
{
temp = temp + clusters[i].getSumSqr();
}
mSWCSS = temp;
}
public void startAnalysis()
{
setInitialCentroids();
int n = 0;
loop1: while (true)
{
for (int l = 0; l < clusters.length; l++)
{
clusters[l].addDataPoint((DataPoint) mDataPoints.elementAt(n));
n++;
if (n >= mDataPoints.size())
break loop1;
}
}
calcSWCSS();
for (int i = 0; i < clusters.length; i++)
{
clusters[i].getCentroid().calcCentroid();
}
calcSWCSS();
for (int i = 0; i < miter; i++)
{
for (int j = 0; j < clusters.length; j++)
{
for (int k = 0; k < clusters[j].getNumDataPoints(); k++)
{
double tempEuDt = clusters[j].getDataPoint(k).getCurrentEuDt();
Cluster tempCluster = null;
boolean matchFoundFlag = false;
for (int l = 0; l < clusters.length; l++)
{
if (tempEuDt > clusters[j].getDataPoint(k).testEuclideanDistance(clusters[l].getCentroid()))
{
tempEuDt = clusters[j].getDataPoint(k).testEuclideanDistance(clusters[l].getCentroid());
tempCluster = clusters[l];
matchFoundFlag = true;
}
}
if (matchFoundFlag)
{
tempCluster.addDataPoint(clusters[j].getDataPoint(k));
clusters[j].removeDataPoint(clusters[j].getDataPoint(k));
for (int m = 0; m < clusters.length; m++)
{
clusters[m].getCentroid().calcCentroid();
}
calcSWCSS();
}
}
}
}
}
public Vector[] getClusterOutput()
{
Vector v[] = new Vector[clusters.length];
for (int i = 0; i < clusters.length; i++)
{
v[i] = clusters[i].getDataPoints();
}
return v;
}
private void setInitialCentroids()
{
// kn = (round((max-min)/k)*n)+min where n is from 0 to (k-1).
double[] c = new double[7];
for (int n = 1; n <= clusters.length; n++)
{
for (int i = 1; i < 8; i++)
{
c[i - 1] = (((getMaxXValue(i) - getMinXValue(i)) / (clusters.length + 1)) * n) + getMinXValue(i);
}
Centroid ce = new Centroid(c[0], c[1], c[2], c[3], c[4], c[5], c[6]);
clusters[n - 1].setCentroid(ce);
ce.setCluster(clusters[n - 1]);
}
}
private double getMaxXValue(int avgnumber)
{
double temp = 0.0;
switch (avgnumber)
{
case 1:// Archeological
temp = ((DataPoint) mDataPoints.elementAt(0)).getAvg1();
break;
case 2:// Monuments
temp = ((DataPoint) mDataPoints.elementAt(0)).getAvg2();
break;
case 3:// Museums
temp = ((DataPoint) mDataPoints.elementAt(0)).getAvg3();
break;
case 4:// Buildings
temp = ((DataPoint) mDataPoints.elementAt(0)).getAvg4();
break;
case 5:// Chapels
temp = ((DataPoint) mDataPoints.elementAt(0)).getAvg5();
break;
case 6:// Beaches
temp = ((DataPoint) mDataPoints.elementAt(0)).getAvg6();
break;
case 7:// Walking
temp = ((DataPoint) mDataPoints.elementAt(0)).getAvg7();
break;
}
for (int i = 0; i < mDataPoints.size(); i++)
{
DataPoint dp = (DataPoint) mDataPoints.elementAt(i);
switch (avgnumber)
{
case 1:// Archeological
temp = (dp.getAvg1() > temp) ? dp.getAvg1() : temp;
break;
case 2:// Monuments
temp = (dp.getAvg2() > temp) ? dp.getAvg2() : temp;
break;
case 3:// Museums
temp = (dp.getAvg3() > temp) ? dp.getAvg3() : temp;
break;
case 4:// Buildings
temp = (dp.getAvg4() > temp) ? dp.getAvg4() : temp;
break;
case 5:// Chapels
temp = (dp.getAvg5() > temp) ? dp.getAvg5() : temp;
break;
case 6:// Beaches
temp = (dp.getAvg6() > temp) ? dp.getAvg6() : temp;
break;
case 7:// Walking
temp = (dp.getAvg7() > temp) ? dp.getAvg7() : temp;
break;
}
}
return temp;
}
private double getMinXValue(int avgnumber)
{
double temp = 0.0;
switch (avgnumber)
{
case 1:// Archeological
temp = ((DataPoint) mDataPoints.elementAt(0)).getAvg1();
break;
case 2:// Monuments
temp = ((DataPoint) mDataPoints.elementAt(0)).getAvg2();
break;
case 3:// Museums
temp = ((DataPoint) mDataPoints.elementAt(0)).getAvg3();
break;
case 4:// Buildings
temp = ((DataPoint) mDataPoints.elementAt(0)).getAvg4();
break;
case 5:// Chapels
temp = ((DataPoint) mDataPoints.elementAt(0)).getAvg5();
break;
case 6:// Beaches
temp = ((DataPoint) mDataPoints.elementAt(0)).getAvg6();
break;
case 7:// Walking
temp = ((DataPoint) mDataPoints.elementAt(0)).getAvg7();
break;
}
for (int i = 0; i < mDataPoints.size(); i++)
{
DataPoint dp = (DataPoint) mDataPoints.elementAt(i);
switch (avgnumber)
{
case 1:// Archeological
temp = (dp.getAvg1() < temp) ? dp.getAvg1() : temp;
break;
case 2:// Monuments
temp = (dp.getAvg2() < temp) ? dp.getAvg2() : temp;
break;
case 3:// Museums
temp = (dp.getAvg3() < temp) ? dp.getAvg3() : temp;
break;
case 4:// Buildings
temp = (dp.getAvg4() < temp) ? dp.getAvg4() : temp;
break;
case 5:// Chapels
temp = (dp.getAvg5() < temp) ? dp.getAvg5() : temp;
break;
case 6:// Beaches
temp = (dp.getAvg6() < temp) ? dp.getAvg6() : temp;
break;
case 7:// Walking
temp = (dp.getAvg7() < temp) ? dp.getAvg7() : temp;
break;
}
}
return temp;
}
public int getKValue()
{
return clusters.length;
}
public int getIterations()
{
return miter;
}
public int getTotalDataPoints()
{
return mDataPoints.size();
}
public double getSWCSS()
{
return mSWCSS;
}
public Cluster getCluster(int pos)
{
return clusters[pos];
}
}
Centroid.java
package eu.eodigos.kmean;
/**
* @author daoger
* @version 1.0
* @k-mean Cluster
*/
class Centroid
{
private double avgC1, avgC2, avgC3, avgC4, avgC5, avgC6, avgC7;
private Cluster mCluster;
public Centroid(double ac1, double ac2, double ac3, double ac4, double ac5, double ac6, double ac7)
{
this.avgC1 = ac1;
this.avgC2 = ac2;
this.avgC3 = ac3;
this.avgC4 = ac4;
this.avgC5 = ac5;
this.avgC6 = ac6;
this.avgC7 = ac7;
}
public void calcCentroid()
{ // only called by CAInstance
int numDP = mCluster.getNumDataPoints();
double temp1 = 0, temp2 = 0, temp3 = 0, temp4 = 0, temp5 = 0, temp6 = 0, temp7 = 0;
int i;
// caluclating the new Centroid
for (i = 0; i < numDP; i++)
{
temp1 = temp1 + mCluster.getDataPoint(i).getAvg1();
// total for avg1
temp2 = temp2 + mCluster.getDataPoint(i).getAvg2();
// total for avg1
temp3 = temp3 + mCluster.getDataPoint(i).getAvg3();
// total for avg1
temp4 = temp4 + mCluster.getDataPoint(i).getAvg4();
// total for avg1
temp5 = temp5 + mCluster.getDataPoint(i).getAvg5();
// total for avg1
temp6 = temp6 + mCluster.getDataPoint(i).getAvg6();
// total for avg1
temp7 = temp7 + mCluster.getDataPoint(i).getAvg7();
// total for avg1
}
this.avgC1 = temp1 / numDP;
this.avgC2 = temp2 / numDP;
this.avgC3 = temp3 / numDP;
this.avgC4 = temp4 / numDP;
this.avgC5 = temp5 / numDP;
this.avgC6 = temp6 / numDP;
this.avgC7 = temp7 / numDP;
// calculating the new Euclidean Distance for each Data Point
temp1 = 0;
temp2 = 0;
temp3 = 0;
temp4 = 0;
temp5 = 0;
temp6 = 0;
temp7 = 0;
for (i = 0; i < numDP; i++)
{
mCluster.getDataPoint(i).calcEuclideanDistance();
}
// calculate the new Sum of Squares for the Cluster
mCluster.calcSumOfSquares();
}
public void setCluster(Cluster c)
{
this.mCluster = c;
}
public double getAvgC1()
{
return avgC1;
}
public void setAvgC1(double avgC1)
{
this.avgC1 = avgC1;
}
public double getAvgC2()
{
return avgC2;
}
public void setAvgC2(double avgC2)
{
this.avgC2 = avgC2;
}
public double getAvgC3()
{
return avgC3;
}
public void setAvgC3(double avgC3)
{
this.avgC3 = avgC3;
}
public double getAvgC4()
{
return avgC4;
}
public void setAvgC4(double avgC4)
{
this.avgC4 = avgC4;
}
public double getAvgC5()
{
return avgC5;
}
public void setAvgC5(double avgC5)
{
this.avgC5 = avgC5;
}
public double getAvgC6()
{
return avgC6;
}
public void setAvgC6(double avgC6)
{
this.avgC6 = avgC6;
}
public double getAvgC7()
{
return avgC7;
}
public void setAvgC7(double avgC7)
{
this.avgC7 = avgC7;
}
public Cluster getCluster()
{
return mCluster;
}
}
Cluster.java
package eu.eodigos.kmean;
import java.util.Vector;
/**
* @author daoger
* @version 1.0
* @k-mean Cluster
*/
class Cluster
{
private String mName;
private Centroid mCentroid;
private double mSumSqr;
private Vector<DataPoint> mDataPoints;
public Cluster(String name)
{
this.mName = name;
this.mCentroid = null; // will be set by calling setCentroid()
mDataPoints = new Vector<DataPoint>();
}
public void setCentroid(Centroid c)
{
mCentroid = c;
}
public Centroid getCentroid()
{
return mCentroid;
}
public void addDataPoint(DataPoint dp) { // called from CAInstance
dp.setCluster(this); // initiates a inner call to calcEuclideanDistance() in DP.
this.mDataPoints.addElement(dp);
calcSumOfSquares();
}
public void removeDataPoint(DataPoint dp)
{
this.mDataPoints.removeElement(dp);
calcSumOfSquares();
}
public int getNumDataPoints()
{
return this.mDataPoints.size();
}
public DataPoint getDataPoint(int pos)
{
return (DataPoint) this.mDataPoints.elementAt(pos);
}
public void calcSumOfSquares()
{ // called from Centroid
int size = this.mDataPoints.size();
double temp = 0;
for (int i = 0; i < size; i++)
{
temp = temp + ((DataPoint) this.mDataPoints.elementAt(i)).getCurrentEuDt();
}
this.mSumSqr = temp;
}
public double getSumSqr()
{
return this.mSumSqr;
}
public String getName()
{
return this.mName;
}
public Vector getDataPoints()
{
return this.mDataPoints;
}
}
DataPoint.java
package eu.eodigos.kmean;
/**
* @author daoger
* @version 1.0
* @k-mean Cluster
*/
public class DataPoint
{
private double avg1, avg2, avg3, avg4, avg5, avg6, avg7;
private Integer userid;
private Cluster mCluster;
private double mEuDt;
public DataPoint(double avg1, double avg2, double avg3, double avg4, double avg5, double avg6, double avg7,
Integer userid)
{
this.avg1 = avg1;
this.avg2 = avg2;
this.avg3 = avg3;
this.avg4 = avg4;
this.avg5 = avg5;
this.avg6 = avg6;
this.avg7 = avg7;
this.userid = userid;
this.mCluster = null;
}
public void setCluster(Cluster cluster)
{
this.mCluster = cluster;
calcEuclideanDistance();
}
public void calcEuclideanDistance()
{
// called when DP is added to a cluster or when a Centroid is
// recalculated.
mEuDt = Math.sqrt(Math.pow((avg1 - mCluster.getCentroid().getAvgC1()), 2)
+ Math.pow((avg2 - mCluster.getCentroid().getAvgC2()), 2)
+ Math.pow((avg3 - mCluster.getCentroid().getAvgC3()), 2)
+ Math.pow((avg4 - mCluster.getCentroid().getAvgC4()), 2)
+ Math.pow((avg5 - mCluster.getCentroid().getAvgC5()), 2)
+ Math.pow((avg6 - mCluster.getCentroid().getAvgC6()), 2)
+ Math.pow((avg7 - mCluster.getCentroid().getAvgC7()), 2));
}
public double testEuclideanDistance(Centroid c)
{
return Math.sqrt(Math.pow((avg1 - c.getAvgC1()), 2) + Math.pow((avg2 - c.getAvgC2()), 2)
+ Math.pow((avg3 - c.getAvgC3()), 2) + Math.pow((avg4 - c.getAvgC4()), 2)
+ Math.pow((avg5 - c.getAvgC5()), 2) + Math.pow((avg6 - c.getAvgC6()), 2)
+ Math.pow((avg7 - c.getAvgC7()), 2));
}
public double getAvg1()
{
return avg1;
}
public void setAvg1(double avg1)
{
this.avg1 = avg1;
}
public double getAvg2()
{
return avg2;
}
public void setAvg2(double avg2)
{
this.avg2 = avg2;
}
public double getAvg3()
{
return avg3;
}
public void setAvg3(double avg3)
{
this.avg3 = avg3;
}
public double getAvg4()
{
return avg4;
}
public void setAvg4(double avg4)
{
this.avg4 = avg4;
}
public double getAvg5()
{
return avg5;
}
public void setAvg5(double avg5)
{
this.avg5 = avg5;
}
public double getAvg6()
{
return avg6;
}
public void setAvg6(double avg6)
{
this.avg6 = avg6;
}
public double getAvg7()
{
return avg7;
}
public void setAvg7(double avg7)
{
this.avg7 = avg7;
}
public Cluster getCluster()
{
return mCluster;
}
public double getCurrentEuDt()
{
return mEuDt;
}
/**
* @return the userid
*/
public Integer getUserid()
{
return userid;
}
/**
* @param userid
* the userid to set
*/
public void setUserid(Integer userid)
{
this.userid = userid;
}
}