/**
*
*
*
*
* */
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;
public class kMeans {
private static int k;
private String dataFilePath;
private int featureCount;
private static Double SSE = Double.MAX_VALUE;
private double SSEthreadhold ;
List<Double[]> srcData = new ArrayList<Double[]>();
List<String> correctClass = new ArrayList<String>();
static Double[][] kCores ;
Map<Integer,List<Double[]>> Cdata = new HashMap<Integer,List<Double[]>>();
public kMeans(int k ,int featureCount ,String dataFilePath) throws IOException{
this.k = k;
this.featureCount = featureCount;
this.dataFilePath = dataFilePath;
SSEthreadhold = Double.MAX_VALUE;
kCores = new Double[k][featureCount+1];
initSrcData();
initKcoresByRandomFunction();
Cluster();
}
public kMeans(int k , int featureCount ,String dataFilePath,double SEthreadhold) throws IOException{
this(k ,featureCount,dataFilePath);
this.SSEthreadhold = SEthreadhold;
}
void initSrcData(){
int count = 0;
try {
BufferedReader br = new BufferedReader(new FileReader(dataFilePath));
String s;
while((s = br.readLine())!=null){
Double[] srcDataTep = new Double[featureCount+1];
srcDataTep[0] = (double)(++count);
String tep[] = s.split(",");
for(int i=1;i<tep.length;i++)
srcDataTep[i] = Double.valueOf(tep[i]);
srcData.add(srcDataTep);
correctClass.add(tep[0]);
}
br.close();
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
System.out.println("srcData FilePath is not accessable!");
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
void initKcoresByRandomFunction(){
Set<Integer> seeds = new HashSet<Integer>();
Random rand = new Random();
int i = 0;
while(i<k){
int index = rand.nextInt(srcData.size()-1);
while(seeds.contains(index)){
index = rand.nextInt(srcData.size()-1);
}
for(int j=1;j<featureCount+1;j++){
kCores[i][j] = srcData.get(index)[j];
}
i++;
}
}
boolean clusterOnce() throws IOException{
Cdata.clear();
System.out.println(srcData.size());
for(Double[] s:srcData){
int index = findNearest(s);
// System.out.println(index);
List<Double[]> tep;
if(Cdata.containsKey(index)){
tep = Cdata.get(index);
}
else{
tep = new ArrayList<Double[]>();
}
tep.add(s);
Cdata.put(index, tep);
}
newCores();
if(newSSE() == SSE)
return false;
else{
SSE = newSSE();
return true;
}
}
void Cluster() throws IOException{
boolean flag = clusterOnce();
while(flag && SSE < SSEthreadhold){
flag = clusterOnce();
System.out.println(SSE);
}
writeResult2File();
}
int findNearest(Double[] s){
double DistanceTep = Double.MAX_VALUE;
int index = 0;
for(int i=0;i<k;i++){
if(Distance(s,kCores[i])<DistanceTep){
index = i;
DistanceTep = Distance(s,kCores[i]);
}
}
return index;
}
double[] split2Array(String s){
double[] data = new double[s.split(",").length-2];
String tep[] = s.split(",");
for(int i=1;i<tep.length-2;i++){
data[i-1] = Integer.parseInt(tep[i]);
}
return data;
}
double Distance(Double[]a ,Double[]b){
double distance = 0.0;
if(a.length!= b.length){
System.out.println("Error Error in the Distance: data length don`t match");
return 0.0;
}
else{
for(int i=1;i<a.length;i++){
distance = distance+ (a[i]-b[i])*(a[i]-b[i]);
}
distance = Math.sqrt(distance);
return distance;
}
}
double newSSE(){
double newSse = 0.0 ;
for(int i=0;i<k;i++){
List<Double[]> iCluster = Cdata.get(i);
Double[] iCore = kCores[i];
if(iCluster!=null){
for(Double[]s : iCluster){
newSse = newSse+ Distance(s,iCore)*Distance(s,iCore);
}
}
}
return newSse;
}
void newCores(){
Set<Integer> KeySet = Cdata.keySet();
for(Integer i:KeySet){
int count = 0;
List<Double[]> tep = Cdata.get(i);
Double coreI[] = new Double[featureCount+1];
for(int t=0;t<featureCount+1;t++)
coreI[t] = 0.0;
for(Double[] dou : tep){
for(int j =1;j<dou.length;j++){
coreI[j] = coreI[j] + dou[j];
}
count++;
}
for(int t=0;t<coreI.length;t++)
kCores[i][t] = coreI[t]/count;
}
}
void writeResult2File() throws IOException{
Set<Integer> key = Cdata.keySet();
for(Integer ii:key){
String filename = "result//"+ii.toString()+".txt";
FileWriter fw = new FileWriter(filename);
for(Double[] dou:Cdata.get(ii)){
String s = correctClass.get(dou[0].intValue()-1)+" ";
for(int j=1;j<dou.length;j++)
s = s+dou[j].toString()+" ";
fw.write(s+"\n");
}
}
}
public static void main(String args[]) throws IOException{
kMeans kk= new kMeans(3, 2, "total.txt");
//Double[][] kCores ;
// for(int i=0;i<k;i++){
// for(int j=0;j<kCores[0].length;j++){
// System.out.print(kCores[i][j]+" ");
// }
// System.out.println();
// }
}
}
Kmeans
最新推荐文章于 2024-06-04 22:11:29 发布