Day 57 kMeans聚类

获得虚拟中心后, 换成与其最近的点作为实际中心, 再聚类。

代码(更新clustering函数,参考文章):

    /**
     *******************************
     * Clustering.
     *******************************
     */
    public void clustering() {
        int[] tempOldClusterArray = new int[dataset.numInstances()];
        tempOldClusterArray[0] = -1;
        int[] tempClusterArray = new int[dataset.numInstances()];
        Arrays.fill(tempClusterArray, 0);
        double[][] tempCenters = new double[numClusters][dataset.numAttributes() - 1];

        // Step 1. Initialize centers.
        int[] tempRandomOrders = getRandomIndices(dataset.numInstances());
        for (int i = 0; i < numClusters; i++) {
            for (int j = 0; j < tempCenters[0].length; j++) {
                tempCenters[i][j] = dataset.instance(tempRandomOrders[i]).value(j);
            } // Of for j
        } // Of for i

        int[] tempClusterLengths = new int[numClusters];
        while (!Arrays.equals(tempOldClusterArray, tempClusterArray)) {
            System.out.println("New loop ...");
            tempOldClusterArray = tempClusterArray;
            tempClusterArray = new int[dataset.numInstances()];
            Arrays.fill(tempClusterLengths, 0);


            // Step 2.1 Minimization. Assign cluster to each instance.
            int tempNearestCenter;
            double tempNearestDistance;
            double tempDistance;

            for (int i = 0; i < dataset.numInstances(); i++) {
                tempNearestCenter = -1;
                tempNearestDistance = Double.MAX_VALUE;

                for (int j = 0; j < numClusters; j++) {
                    tempDistance = distance(i, tempCenters[j]);
                    if (tempNearestDistance > tempDistance) {
                        tempNearestDistance = tempDistance;
                        tempNearestCenter = j;
                    } // Of if
                } // Of for j
                tempClusterArray[i] = tempNearestCenter;
                tempClusterLengths[tempNearestCenter]++;
            } // Of for i


            // Step 2.2 Mean. Find new centers.

            double[][] tempNewCenters = new double[numClusters][dataset.numAttributes() - 1];
            // Arrays.fill(tempNewCenters, 0);
            for (int i = 0; i < dataset.numInstances(); i++) {
                for (int j = 0; j < tempNewCenters[0].length; j++) {
                    tempNewCenters[tempClusterArray[i]][j] += (dataset.instance(i).value(j)
                            / tempClusterLengths[tempClusterArray[i]]);
                } // Of for j
            } // Of for i

            System.out.println("Now the new centers are: " + Arrays.deepToString(tempNewCenters));
            tempCenters = tempNewCenters;
        } // Of while

        // Step 2.3 Mean. Get new actual centers.
        for (int i = 0; i < numClusters; i++) {
            int tempNearestCenter = -1;
            double tempNearestDistance = Double.MAX_VALUE;
            for (int j = 0; j < dataset.numInstances(); j++) {
                double tempDistance = distance(j, tempCenters[i]);
                if (tempNearestDistance > tempDistance) {
                    tempNearestDistance = tempDistance;
                    tempNearestCenter = j;
                } // Of if
            } // Of for j
            tempCenters[i][0] = dataset.instance(tempNearestCenter).value(0);
            tempCenters[i][1] = dataset.instance(tempNearestCenter).value(1);
            tempCenters[i][2] = dataset.instance(tempNearestCenter).value(2);
            tempCenters[i][3] = dataset.instance(tempNearestCenter).value(3);
        } // Of for i

        // Step 3. Form clusters.
        clusters = new int[numClusters][];
        int[] tempCounters = new int[numClusters];
        for (int i = 0; i < numClusters; i++) {
            clusters[i] = new int[tempClusterLengths[i]];
        } // Of for i

        for (int i = 0; i < tempClusterArray.length; i++) {
            clusters[tempClusterArray[i]][tempCounters[tempClusterArray[i]]] = i;
            tempCounters[tempClusterArray[i]]++;
        } // Of for i

        System.out.println("The clusters are: " + Arrays.deepToString(clusters));
    }// Of clustering

结果:

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值