day67

最新推荐文章于 2023-12-17 23:41:26 发布
mitchellemin
最新推荐文章于 2023-12-17 23:41:26 发布
阅读量47
点赞数
本文链接：https://blog.csdn.net/mitchellemin/article/details/119257115
版权
/**
     ********************
     * Compute distanceToMaster, the distance to its master.
     ********************
     */
    public void computeDistanceToMaster() {
        distanceToMaster = new double[dataset.numInstances()];
        masters = new int[dataset.numInstances()];
        descendantDensities = new int[dataset.numInstances()];
        instanceStatusArray = new int[dataset.numInstances()];

        descendantDensities = mergeSortToIndices(densities);
        distanceToMaster[descendantDensities[0]] = maximalDistance;

        double tempDistance;
        for (int i = 1; i < dataset.numInstances(); i++) {
            //Initialize.
            distanceToMaster[descendantDensities[i]] = maximalDistance;
            for (int j = 0; j <= i - 1; j++) {
                tempDistance = distance(descendantDensities[i], descendantDensities[j]);
                if (distanceToMaster[descendantDensities[i]] > tempDistance) {
                    distanceToMaster[descendantDensities[i]] = tempDistance;
                    masters[descendantDensities[i]] = descendantDensities[j];
                }//of if
            }//of for j
        }//of for i
        System.out.println("First compute, masters = " + Arrays.toString(masters));
        System.out.println("descendantDensities = " + Arrays.toString(descendantDensities));
    }//of computeDistanceToMaster

    /**
     ********************
     * Compute priority. Elements with higher priority is more likely to be
     * selected as a cluster center. Now it is rho * distanceToMaster. It can
     * also be rho^alpha * distanceToMaster.
     ********************
     */
    public void computePriority() {
        priority = new double[dataset.numInstances()];
        for (int i = 0; i < dataset.numInstances(); i++) {
            priority[i] = densities[i] * distanceToMaster[i];
        }//of for i
    }//of computePriority

    /**
     ********************
     * The block of a node should be same as its master. This recursive method
     * is efficient.
     *
     * @param paraIndex
     *            The index of the given node.
     * @return The cluster index of the current node.
     ********************
     */
    public int coincideWithMaster(int paraIndex) {
        if (clusterIndices[paraIndex] == -1) {
            int tempMaster = masters[paraIndex];
            clusterIndices[paraIndex] = coincideWithMaster(tempMaster);
        }//of if

        return clusterIndices[paraIndex];
    }//of coincideWithMaster

    /**
     *************************
     * Cluster a block in two. According to the master tree.
     *
     * @param paraBlock
     *            The given block.
     * @return The new blocks where the two most represent instances serve as
     *         the root.
     *************************
     */
    public int[][] clusterInTwo(int[] paraBlock) {
        //Reinitialize. In fact, only instances in the given block is
        //considered.
        Arrays.fill(clusterIndices, -1);

        //Initialize the cluster number of the two roots.
        for (int i = 0; i < 2; i++) {
            clusterIndices[paraBlock[i]] = i;
        }//of for i

        for (int i = 0; i < paraBlock.length; i++) {
            if (clusterIndices[paraBlock[i]] != -1) {
                // Already have a cluster number.
                continue;
            }//of if

            clusterIndices[paraBlock[i]] = coincideWithMaster(masters[paraBlock[i]]);
        }//of for i

        //The sub blocks.
        int[][] resultBlocks = new int[2][];
        int tempFistBlockCount = 0;
        for (int i = 0; i < clusterIndices.length; i++) {
            if (clusterIndices[i] == 0) {
                tempFistBlockCount++;
            }//of if
        }//of for i
        resultBlocks[0] = new int[tempFistBlockCount];
        resultBlocks[1] = new int[paraBlock.length - tempFistBlockCount];

        // Copy. You can design shorter code when the number of clusters is
        // greater than 2.
        int tempFirstIndex = 0;
        int tempSecondIndex = 0;
        for (int i = 0; i < paraBlock.length; i++) {
            if (clusterIndices[paraBlock[i]] == 0) {
                resultBlocks[0][tempFirstIndex] = paraBlock[i];
                tempFirstIndex++;
            } else {
                resultBlocks[1][tempSecondIndex] = paraBlock[i];
                tempSecondIndex++;
            } // Of if
        }//of for i

        System.out.println("Split (" + paraBlock.length + ") instances " + Arrays.toString(paraBlock) + "\r\n to ("
                + resultBlocks[0].length + " ) instances" + Arrays.toString(resultBlocks[0]) + "\r\n and (" +
                resultBlocks[1].length + ") instances " + Arrays.toString(resultBlocks[1]));
        return resultBlocks;
    }//of clusterInTwo

    /**
     ********************
     * Classify instances in the block by simple voting.
     * 
     * @param paraBlock
     *            The given block.
     ********************
     */
    public void vote(int[] paraBlock){
        int[] tempClassCount = new int[dataset.numClasses()];
        for (int i = 0; i < paraBlock.length; i++) {
            if (instanceStatusArray[paraBlock[i]] == 1) {
                tempClassCount[(int) dataset.instance(paraBlock[i]).classValue()]++;
            }//of if
        }//of for i
        
        int tempMaxClass = -1;
        int tempMaxCount = -1;
        for (int i = 0; i < tempClassCount.length; i++) {
            if (tempMaxCount < tempClassCount[i]) {
                tempMaxClass = i;
                tempMaxCount = tempClassCount[i];
            }//of if
        }//of for i
        
        //Classify unprocessed instances.
        for (int i = 0; i < paraBlock.length; i++) {
            if (instanceStatusArray[paraBlock[i]] == 0) {
                predictedLabels[paraBlock[i]] = tempMaxClass;
                instanceStatusArray[paraBlock[i]] = 2;
            }//of if
        }//of for i
    }//of vote
    
    /**
     ********************
     * Cluster based active learning. Prepare for
     *
     * @param paraRatio
	 *            The ratio of the maximal distance as the dc.
	 * @param paraMaxNumQuery
	 *            The maximal number of queries for the whole dataset.
	 * @param paraSmallBlockThreshold
     *            The small block threshold.
     ********************
     */
    public void clusterBasedActiveLearning(double paraRatio, int paraMaxNumQuery, int paraSmallBlockThreshold) {
        radius = maximalDistance * paraRatio;
        smallBlockThreshold = paraSmallBlockThreshold;

        maxNumQuery = paraMaxNumQuery;
        predictedLabels = new int[dataset.numInstances()];

        for (int i = 0; i < dataset.numInstances(); i++) {
            predictedLabels[i] = -1;
        } // Of for i

        computeDensitiesGaussian();
        computeDistanceToMaster();
        computePriority();
        descendantRepresentatives = mergeSortToIndices(priority);
        System.out.println("descendantRepresentatives = " + Arrays.toString(descendantRepresentatives));
        numQuery = 0;
        clusterBasedActiveLearning(descendantRepresentatives);
    }//of clusterBasedActiveLearning

    /**
     ********************
     * Cluster based active learning.
     *
     * @param paraBlock
     *            The given block. This block must be sorted according to the
     *            priority in descendant order.
     ********************
     */
    public void clusterBasedActiveLearning(int[] paraBlock){
        System.out.println("clusterBasedActiveLearning for block " + Arrays.toString(paraBlock));

        // Step 1. How many labels are queried for this block.
        int tempExpectedQueries = (int) Math.sqrt(paraBlock.length);
        int tempNumQuery = 0;
        for (int i = 0; i < paraBlock.length; i++) {
            if (instanceStatusArray[paraBlock[i]] == 1) {
                tempNumQuery++;
            }//of if
        }//of for i

        // Step 2. Vote for small blocks.
        if((tempNumQuery >= tempExpectedQueries) && (paraBlock.length <= smallBlockThreshold)){
            System.out.println("" + tempNumQuery +
                    " instances are queried, vote for block: \r\n" + Arrays.toString(paraBlock));
            vote(paraBlock);

            return;
        }//of if

        // Step 3. Query enough labels.
        for (int i = 0; i < tempExpectedQueries; i++) {
            if (numQuery >= maxNumQuery) {
                System.out.println("No more queries are provided, numQuery = " + numQuery +".");
                vote(paraBlock);
                return;
            }//of if

            if (instanceStatusArray[paraBlock[i]] == 0) {
                instanceStatusArray[paraBlock[i]] = 1;
                predictedLabels[paraBlock[i]] = (int) dataset.instance(paraBlock[i]).classValue();
                // System.out.println("Query #" + paraBlock[i] +", numQuery ="
                // + numQuery);
                numQuery++;
            }//of if
        }//of for i

        // Step 4. About Pure
        int tempFirstLabel = predictedLabels[paraBlock[0]];
        boolean tempPure = true;
        for (int i = 0; i < tempExpectedQueries; i++) {
            if (predictedLabels[paraBlock[i]] != tempFirstLabel) {
                tempPure = false;
                break;
            }//of if
        }//of for i
        if (tempPure) {
            System.out.println("Classify for pure block: " + Arrays.toString(paraBlock));
            for (int i = tempExpectedQueries; i < paraBlock.length; i++) {
                if (instanceStatusArray[paraBlock[i]] == 0){
                    predictedLabels[paraBlock[i]] = tempFirstLabel;
                    instanceStatusArray[paraBlock[i]] = 2;
                }//of if
            }//of for i
            return;
        }//of if

        // Step 5. Split in two and process then independently.
        int[][] tempBlocks = clusterInTwo(paraBlock);
        for (int i = 0; i < 2; i++) {
            // Attention: recursive invoking here.
            clusterBasedActiveLearning(tempBlocks[i]);
        }//of for i
    }//of clusterBasedActiveLearning

    /**
     ********************
     * Show the statistics information.
     ********************
     */
    public String toString(){
        int[] tempStatusCounts = new int[3];
        double tempCorrect = 0;
        for (int i = 0; i < dataset.numInstances(); i++) {
            tempStatusCounts[instanceStatusArray[i]]++;
            if (predictedLabels[i] == (int) dataset.instance(i).classValue()) {
                tempCorrect++;
            }//of if
        }//of for i

        String resultString = "(unhandled, queried, classified = " + Arrays.toString(tempStatusCounts);
        resultString += "\r\nCorrect = " + tempCorrect + ", accuracy = " + (tempCorrect / dataset.numInstances());

        return resultString;
    }//of toString

    /**
     ********************
     * The entrance of the program.
     *
     * @param args
     *            Not used now.
     ********************
     */
    public static void main(String[] args){
        long tempStart = System.currentTimeMillis();

        System.out.println("Starting ALEC.");
        //String arffFilename = "D:/mitchelles/data/mushroom.arff";
        String arffFilename = "D:/mitchelles/data/iris.arff";

        Alec tempAlec = new Alec(arffFilename);
        tempAlec.clusterBasedActiveLearning(0.1, 30, 3);
        //tempAlec.clusterBasedActiveLearning(0.1, 800, 3);
        System.out.println(tempAlec);

        long tempEnd = System.currentTimeMillis();
        System.out.println("Runtime: " + (tempEnd - tempStart) + "ms.");
    }//of main
}//of class Alec
mitchellemin
关注
0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
day67

/** ******************** * Compute distanceToMaster, the distance to its master. ******************** */ public void computeDistanceToMaster() { distanceToMaster = new double[dataset.numInstances()]; masters = new i.
复制链接

扫一扫