day67

/**
     ********************
     * Compute distanceToMaster, the distance to its master.
     ********************
     */
    public void computeDistanceToMaster() {
        distanceToMaster = new double[dataset.numInstances()];
        masters = new int[dataset.numInstances()];
        descendantDensities = new int[dataset.numInstances()];
        instanceStatusArray = new int[dataset.numInstances()];

        descendantDensities = mergeSortToIndices(densities);
        distanceToMaster[descendantDensities[0]] = maximalDistance;

        double tempDistance;
        for (int i = 1; i < dataset.numInstances(); i++) {
            //Initialize.
            distanceToMaster[descendantDensities[i]] = maximalDistance;
            for (int j = 0; j <= i - 1; j++) {
                tempDistance = distance(descendantDensities[i], descendantDensities[j]);
                if (distanceToMaster[descendantDensities[i]] > tempDistance) {
                    distanceToMaster[descendantDensities[i]] = tempDistance;
                    masters[descendantDensities[i]] = descendantDensities[j];
                }//of if
            }//of for j
        }//of for i
        System.out.println("First compute, masters = " + Arrays.toString(masters));
        System.out.println("descendantDensities = " + Arrays.toString(descendantDensities));
    }//of computeDistanceToMaster

    /**
     ********************
     * Compute priority. Elements with higher priority is more likely to be
     * selected as a cluster center. Now it is rho * distanceToMaster. It can
     * also be rho^alpha * distanceToMaster.
     ********************
     */
    public void computePriority() {
        priority = new double[dataset.numInstances()];
        for (int i = 0; i < dataset.numInstances(); i++) {
            priority[i] = densities[i] * distanceToMaster[i];
        }//of for i
    }//of computePriority

    /**
     ********************
     * The block of a node should be same as its master. This recursive method
     * is efficient.
     *
     * @param paraIndex
     *            The index of the given node.
     * @return The cluster index of the current node.
     ********************
     */
    public int coincideWithMaster(int paraIndex) {
        if (clusterIndices[paraIndex] == -1) {
            int tempMaster = masters[paraIndex];
            clusterIndices[paraIndex] = coincideWithMaster(tempMaster);
        }//of if

        return clusterIndices[paraIndex];
    }//of coincideWithMaster

    /**
     *************************
     * Cluster a block in two. According to the master tree.
     *
     * @param paraBlock
     *            The given block.
     * @return The new blocks where the two most represent instances serve as
     *         the root.
     *************************
     */
    public int[][] clusterInTwo(int[] paraBlock) {
        //Reinitialize. In fact, only instances in the given block is
        //considered.
        Arrays.fill(clusterIndices, -1);

        //Initialize the cluster number of the two roots.
        for (int i = 0; i < 2; i++) {
            clusterIndices[paraBlock[i]] = i;
        }//of for i

        for (int i = 0; i < paraBlock.length; i++) {
            if (clusterIndices[paraBlock[i]] != -1) {
                // Already have a cluster number.
                continue;
            }//of if

            clusterIndices[paraBlock[i]] = coincideWithMaster(masters[paraBlock[i]]);
        }//of for i

        //The sub blocks.
        int[][] resultBlocks = new int[2][];
        int tempFistBlockCount = 0;
        for (int i = 0; i < clusterIndices.length; i++) {
            if (clusterIndices[i] == 0) {
                tempFistBlockCount++;
            }//of if
        }//of for i
        resultBlocks[0] = new int[tempFistBlockCount];
        resultBlocks[1] = new int[paraBlock.length - tempFistBlockCount];

        // Copy. You can design shorter code when the number of clusters is
        // greater than 2.
        int tempFirstIndex = 0;
        int tempSecondIndex = 0;
        for (int i = 0; i < paraBlock.length; i++) {
            if (clusterIndices[paraBlock[i]] == 0) {
                resultBlocks[0][tempFirstIndex] = paraBlock[i];
                tempFirstIndex++;
            } else {
                resultBlocks[1][tempSecondIndex] = paraBlock[i];
                tempSecondIndex++;
            } // Of if
        }//of for i

        System.out.println("Split (" + paraBlock.length + ") instances " + Arrays.toString(paraBlock) + "\r\n to ("
                + resultBlocks[0].length + " ) instances" + Arrays.toString(resultBlocks[0]) + "\r\n and (" +
                resultBlocks[1].length + ") instances " + Arrays.toString(resultBlocks[1]));
        return resultBlocks;
    }//of clusterInTwo

    /**
     ********************
     * Classify instances in the block by simple voting.
     * 
     * @param paraBlock
     *            The given block.
     ********************
     */
    public void vote(int[] paraBlock){
        int[] tempClassCount = new int[dataset.numClasses()];
        for (int i = 0; i < paraBlock.length; i++) {
            if (instanceStatusArray[paraBlock[i]] == 1) {
                tempClassCount[(int) dataset.instance(paraBlock[i]).classValue()]++;
            }//of if
        }//of for i
        
        int tempMaxClass = -1;
        int tempMaxCount = -1;
        for (int i = 0; i < tempClassCount.length; i++) {
            if (tempMaxCount < tempClassCount[i]) {
                tempMaxClass = i;
                tempMaxCount = tempClassCount[i];
            }//of if
        }//of for i
        
        //Classify unprocessed instances.
        for (int i = 0; i < paraBlock.length; i++) {
            if (instanceStatusArray[paraBlock[i]] == 0) {
                predictedLabels[paraBlock[i]] = tempMaxClass;
                instanceStatusArray[paraBlock[i]] = 2;
            }//of if
        }//of for i
    }//of vote
    
    /**
     ********************
     * Cluster based active learning. Prepare for
     *
     * @param paraRatio
	 *            The ratio of the maximal distance as the dc.
	 * @param paraMaxNumQuery
	 *            The maximal number of queries for the whole dataset.
	 * @param paraSmallBlockThreshold
     *            The small block threshold.
     ********************
     */
    public void clusterBasedActiveLearning(double paraRatio, int paraMaxNumQuery, int paraSmallBlockThreshold) {
        radius = maximalDistance * paraRatio;
        smallBlockThreshold = paraSmallBlockThreshold;

        maxNumQuery = paraMaxNumQuery;
        predictedLabels = new int[dataset.numInstances()];

        for (int i = 0; i < dataset.numInstances(); i++) {
            predictedLabels[i] = -1;
        } // Of for i

        computeDensitiesGaussian();
        computeDistanceToMaster();
        computePriority();
        descendantRepresentatives = mergeSortToIndices(priority);
        System.out.println("descendantRepresentatives = " + Arrays.toString(descendantRepresentatives));
        numQuery = 0;
        clusterBasedActiveLearning(descendantRepresentatives);
    }//of clusterBasedActiveLearning

    /**
     ********************
     * Cluster based active learning.
     *
     * @param paraBlock
     *            The given block. This block must be sorted according to the
     *            priority in descendant order.
     ********************
     */
    public void clusterBasedActiveLearning(int[] paraBlock){
        System.out.println("clusterBasedActiveLearning for block " + Arrays.toString(paraBlock));

        // Step 1. How many labels are queried for this block.
        int tempExpectedQueries = (int) Math.sqrt(paraBlock.length);
        int tempNumQuery = 0;
        for (int i = 0; i < paraBlock.length; i++) {
            if (instanceStatusArray[paraBlock[i]] == 1) {
                tempNumQuery++;
            }//of if
        }//of for i

        // Step 2. Vote for small blocks.
        if((tempNumQuery >= tempExpectedQueries) && (paraBlock.length <= smallBlockThreshold)){
            System.out.println("" + tempNumQuery +
                    " instances are queried, vote for block: \r\n" + Arrays.toString(paraBlock));
            vote(paraBlock);

            return;
        }//of if

        // Step 3. Query enough labels.
        for (int i = 0; i < tempExpectedQueries; i++) {
            if (numQuery >= maxNumQuery) {
                System.out.println("No more queries are provided, numQuery = " + numQuery +".");
                vote(paraBlock);
                return;
            }//of if

            if (instanceStatusArray[paraBlock[i]] == 0) {
                instanceStatusArray[paraBlock[i]] = 1;
                predictedLabels[paraBlock[i]] = (int) dataset.instance(paraBlock[i]).classValue();
                // System.out.println("Query #" + paraBlock[i] +", numQuery ="
                // + numQuery);
                numQuery++;
            }//of if
        }//of for i

        // Step 4. About Pure
        int tempFirstLabel = predictedLabels[paraBlock[0]];
        boolean tempPure = true;
        for (int i = 0; i < tempExpectedQueries; i++) {
            if (predictedLabels[paraBlock[i]] != tempFirstLabel) {
                tempPure = false;
                break;
            }//of if
        }//of for i
        if (tempPure) {
            System.out.println("Classify for pure block: " + Arrays.toString(paraBlock));
            for (int i = tempExpectedQueries; i < paraBlock.length; i++) {
                if (instanceStatusArray[paraBlock[i]] == 0){
                    predictedLabels[paraBlock[i]] = tempFirstLabel;
                    instanceStatusArray[paraBlock[i]] = 2;
                }//of if
            }//of for i
            return;
        }//of if

        // Step 5. Split in two and process then independently.
        int[][] tempBlocks = clusterInTwo(paraBlock);
        for (int i = 0; i < 2; i++) {
            // Attention: recursive invoking here.
            clusterBasedActiveLearning(tempBlocks[i]);
        }//of for i
    }//of clusterBasedActiveLearning

    /**
     ********************
     * Show the statistics information.
     ********************
     */
    public String toString(){
        int[] tempStatusCounts = new int[3];
        double tempCorrect = 0;
        for (int i = 0; i < dataset.numInstances(); i++) {
            tempStatusCounts[instanceStatusArray[i]]++;
            if (predictedLabels[i] == (int) dataset.instance(i).classValue()) {
                tempCorrect++;
            }//of if
        }//of for i

        String resultString = "(unhandled, queried, classified = " + Arrays.toString(tempStatusCounts);
        resultString += "\r\nCorrect = " + tempCorrect + ", accuracy = " + (tempCorrect / dataset.numInstances());

        return resultString;
    }//of toString

    /**
     ********************
     * The entrance of the program.
     *
     * @param args
     *            Not used now.
     ********************
     */
    public static void main(String[] args){
        long tempStart = System.currentTimeMillis();

        System.out.println("Starting ALEC.");
        //String arffFilename = "D:/mitchelles/data/mushroom.arff";
        String arffFilename = "D:/mitchelles/data/iris.arff";

        Alec tempAlec = new Alec(arffFilename);
        tempAlec.clusterBasedActiveLearning(0.1, 30, 3);
        //tempAlec.clusterBasedActiveLearning(0.1, 800, 3);
        System.out.println(tempAlec);

        long tempEnd = System.currentTimeMillis();
        System.out.println("Runtime: " + (tempEnd - tempStart) + "ms.");
    }//of main
}//of class Alec

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
明日问题的输入事件与输出事件为: | Input Events | Node Output Event description Node . e0: start program event 1 e7: Welcome message 2 e1: center a valid month 6e8: print today's date 4 e2: enter an invalid month 67| e9: print tomorrow's date 6 e3: enter a valid day 69 e10: "month OK" 39 e4: enter an invalid day 69 e11: "month out of range" 41 e5: enter a valid year 71 e12: "day OK" 4 e6: enter an invalid year 71 e13: "day out of range" 4S e14: "year OK" 54 e15: "year out of range" 5( e16: "Date OK" 6C e17: "please enter a valid date" 62 e18: "enter a month" 6( e19: "enter a day" 68 e20: "enter a year" 70 c21: "Day is month, day, year" 8S 在下表中,ASF-6对应的输入事件为: 输出事件 ASF-7对应的输入事件为: 输出事件 为:_ ASF-8对应的输入事件为:_, 输出事件 为:_ ASF-9对应的输入事件为:_,输 出事件 为:_ Atomic System Function Inputs Outputs L ASF-1 start program e0 e7 | ASF-2 enter a date with an invalid month, valid day and valid year e2, e3,e5 e11,e12,e14,e17 | ASF-3 enter a date with an invalid day, valid month and validyear| e1, c4,e5 e10,e13,e14,e17 | ASE-4 enter a date wih an ivalid year, valid day and valid monh| el,e3, c6 e10,e12, el5,e17| ASF-5 enter a date with valid month, day, and year e1,e3,e5 . e10, e12, e14, e16, c21 | ASIF-6 enter a date with valid month, day and year invalid ASF-7 enter a date with valid day, month and year invalid ASF-8 enter a date with valid year, day and month invalid ASF-9 enter a date with invalid month, day, year
06-01
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值