Kmeans

/**

 * 
 * 
 * 
 * 
 * */
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;

public class kMeans {

    private static int k;
    private String dataFilePath;
    private int featureCount;
    private static Double SSE = Double.MAX_VALUE;
    private double SSEthreadhold ;
    List<Double[]> srcData = new ArrayList<Double[]>();
    List<String> correctClass =  new ArrayList<String>();
    static Double[][] kCores ;
    Map<Integer,List<Double[]>> Cdata = new HashMap<Integer,List<Double[]>>();

    public kMeans(int k ,int featureCount ,String dataFilePath) throws IOException{
        this.k = k;
        this.featureCount = featureCount;
        this.dataFilePath = dataFilePath;
        SSEthreadhold = Double.MAX_VALUE;
        kCores = new Double[k][featureCount+1];
        initSrcData();
        initKcoresByRandomFunction();
        Cluster();
    }

    public kMeans(int k , int featureCount ,String dataFilePath,double SEthreadhold) throws IOException{
        this(k ,featureCount,dataFilePath);
        this.SSEthreadhold = SEthreadhold;
    }

    void initSrcData(){
        int count = 0;
        try {
            BufferedReader br = new BufferedReader(new FileReader(dataFilePath));
            String s;
            while((s = br.readLine())!=null){

                Double[] srcDataTep = new Double[featureCount+1];
                srcDataTep[0] = (double)(++count);
                String tep[] = s.split(",");
                for(int i=1;i<tep.length;i++)
                    srcDataTep[i] = Double.valueOf(tep[i]);
                srcData.add(srcDataTep);                            
                correctClass.add(tep[0]);   
                    }
            br.close();
        } catch (FileNotFoundException e) {
            // TODO Auto-generated catch block
            System.out.println("srcData FilePath is not accessable!");
            e.printStackTrace();
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }       
    }

    void initKcoresByRandomFunction(){

        Set<Integer> seeds = new HashSet<Integer>();
        Random rand = new Random();
        int i = 0;
        while(i<k){
            int index = rand.nextInt(srcData.size()-1);
            while(seeds.contains(index)){
                index = rand.nextInt(srcData.size()-1);
            }

            for(int j=1;j<featureCount+1;j++){
                kCores[i][j] = srcData.get(index)[j];
            }
            i++;            
        }
    }

    boolean clusterOnce() throws IOException{

        Cdata.clear();
        System.out.println(srcData.size());
        for(Double[] s:srcData){
            int index = findNearest(s);
        //  System.out.println(index);
            List<Double[]> tep;
            if(Cdata.containsKey(index)){
                tep = Cdata.get(index);
            }
            else{
                tep = new ArrayList<Double[]>();
            }
            tep.add(s);
            Cdata.put(index, tep);
        }
        newCores();

        if(newSSE() == SSE)
            return false;
        else{
            SSE = newSSE();
            return true;
        } 

    }

    void Cluster() throws IOException{
        boolean flag = clusterOnce(); 
        while(flag && SSE < SSEthreadhold){
            flag = clusterOnce();
            System.out.println(SSE);
        }
        writeResult2File();
    }

    int findNearest(Double[] s){
        double DistanceTep = Double.MAX_VALUE;
        int index = 0;
        for(int i=0;i<k;i++){
            if(Distance(s,kCores[i])<DistanceTep){

                index = i;
                DistanceTep = Distance(s,kCores[i]);
            }

        }
        return index;
    }

    double[] split2Array(String s){

        double[] data = new double[s.split(",").length-2];
        String tep[]  = s.split(",");

        for(int i=1;i<tep.length-2;i++){
            data[i-1] = Integer.parseInt(tep[i]);
        }
        return data;
    }
    double Distance(Double[]a ,Double[]b){
        double distance = 0.0;
        if(a.length!= b.length){
            System.out.println("Error Error in the Distance:  data length don`t match");
            return 0.0;
        }
        else{
            for(int i=1;i<a.length;i++){
                distance = distance+ (a[i]-b[i])*(a[i]-b[i]);
            }
            distance = Math.sqrt(distance);
            return distance;            
        }
    }

    double newSSE(){                      

        double newSse = 0.0 ;
        for(int i=0;i<k;i++){
            List<Double[]> iCluster = Cdata.get(i);
            Double[] iCore = kCores[i];
            if(iCluster!=null){
                for(Double[]s : iCluster){
                    newSse = newSse+ Distance(s,iCore)*Distance(s,iCore);
                }
            }

        }
        return newSse;
    }

    void newCores(){                            
        Set<Integer> KeySet = Cdata.keySet();
        for(Integer i:KeySet){              
            int count = 0;
            List<Double[]> tep = Cdata.get(i);
            Double coreI[] = new Double[featureCount+1];
            for(int t=0;t<featureCount+1;t++)
                coreI[t] = 0.0;
            for(Double[] dou : tep){    
                for(int j =1;j<dou.length;j++){         
                    coreI[j] = coreI[j] + dou[j];
                }
                count++;
            }
            for(int t=0;t<coreI.length;t++)         
                kCores[i][t] = coreI[t]/count;
        }
    }
    void writeResult2File() throws IOException{

        Set<Integer> key = Cdata.keySet();
        for(Integer ii:key){
            String filename = "result//"+ii.toString()+".txt";
            FileWriter fw = new FileWriter(filename);
            for(Double[] dou:Cdata.get(ii)){
                String s = correctClass.get(dou[0].intValue()-1)+" ";
                for(int j=1;j<dou.length;j++)
                    s = s+dou[j].toString()+" ";
                fw.write(s+"\n");
            }
        }
    }
    public static void main(String args[]) throws IOException{
        kMeans kk= new kMeans(3, 2, "total.txt");
        //Double[][] kCores ;

//      for(int i=0;i<k;i++){
//          for(int j=0;j<kCores[0].length;j++){
//              System.out.print(kCores[i][j]+" ");
//          }
//          System.out.println();
//      }
    }
}
  • 1
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值