Weka开发[5]-半监督算法

转自 Koala++'s blog 感谢原作者


这次介绍一个我现在正在做的半监督算法,因为我只是提出一个框架,半监督算法只是一个工具,可是为了找这个工具也让我花了不少功夫。现在介绍的暂时不是Weka的一部分,但开发者是waikato大学(也就是开发Weka的那个大学)。下载地址是http://www.cs.waikato.ac.nz/~fracpete/projects/collective-classification/。我下载的是Classifiers compatible now with Weka 3.5.8 。别的几种,能不能行我也不知道,我然后也按作者说的把Weka3.5.8也下载下来了(不知道和以前的版本是否兼容),使用非常简单,先到Weka目录下,把weka-src.jar解压,再把下载的src.tar.gzWeka目录下解压,如果有替换就替换,如果没有替换,证明你的操作有错。

      然后就可以试一下了,把全部Weka源文件拷贝到你新的工程中去,下面我给出了一个测试的例子,不要太相信我的办法,我还没细看他的代码(分成训练和测试样本的方法有问题,懒得改有,谁改完了,请发给我)。注意一下,他训练是同时用训练集和测试集。如果不明白,下载他的一篇论文Using Weighted Nearest Neighbor to Benefit from Unlabeled Data。非常简单,不用害怕。

      补充:Tri-train是属于Multi-View的半监督算法,可以从周志华的网站上下载到,论文和代码都不是很难懂,网址是:http://cs.nju.edu.cn/zhouzh/zhouzh.files/publication/annex/TriTrain.htm,这里不作解释。如果还有别的基于Weka的半监督代码,请告诉我。

 

package semiTest;

 

import java.io.BufferedWriter;

import java.io.FileReader;

import java.io.FileWriter;

import java.io.IOException;

 

import weka.classifiers.Evaluation;

import weka.classifiers.collective.functions.LLGC;

import weka.classifiers.collective.meta.CollectiveBagging;

import weka.classifiers.collective.meta.YATSI;

import weka.classifiers.trees.J48;

import weka.core.Instances;

import weka.core.Utils;

import weka.filters.Filter;

import weka.filters.unsupervised.instance.Resample;

 

public class Test

{

    Instances m_instances = null;

    Instances m_testIns = null;

    Instances m_trainIns = null;

   

    public void getFileInstances( String fileName ) throws Exception

    {

        FileReader frData = new FileReader( fileName );

        m_instances = new Instances( frData );

    }

   

    public void writeToArffFile(String newFilePath,Instances ins) throws IOException

    {

        BufferedWriter writer = new BufferedWriter(new FileWriter(newFilePath));

        writer.write(ins.toString());

        writer.flush();

        writer.close();

    }

   

    public void FilterRemovePercentageTest() throws Exception

    {

        Resample removePercentage =new Resample();

        String[] options = Utils.splitOptions("-Z 10 -no-replacement");

        removePercentage.setOptions(options);

        removePercentage.setInputFormat( m_instances );

        m_trainIns = Filter.useFilterm_instances, removePercentage); 

        writeToArffFile("TrainData.arff"m_trainIns );

       

        options = Utils.splitOptions("-Z 90 -no-replacement");

        removePercentage.setOptions(options);

        removePercentage.setInputFormat( m_instances );

        m_testIns = Filter.useFilterm_instances,removePercentage);

        writeToArffFile("TestData.arff"m_testIns );

       

        m_trainIns.setClassIndex( m_trainIns.numAttributes() - 1 );

        m_testIns.setClassIndex( m_testIns.numAttributes() - 1 );

    }

   

    public void LLGCTest() throws Exception

    {

        System.out.println( " **************LLGC********** " );

       

        LLGC llgc = new LLGC();

        llgc.buildClassifier( m_trainInsm_testIns );

       

        Evaluation eval = new Evaluation( m_trainIns );

        eval.evaluateModel( llgc, m_testIns );

        System.out.println( eval.toSummaryString() );

    }

   

    public void J48Test() throws Exception

    {

        System.out.println( " **************J48********** " );

       

        J48 j48 = new J48();       

        j48.buildClassifier( m_trainIns );

       

        Evaluation eval = new Evaluation( m_trainIns );

        eval.evaluateModel( j48, m_testIns );

        System.out.println( eval.toSummaryString() );  

    }

   

    public void YATSITest() throws Exception

    {

        System.out.println( " **************YATSI********** " );

       

        YATSI yatsi = new YATSI();

        yatsi.buildClassifier( m_trainInsm_testIns );

       

        Evaluation eval = new Evaluation( m_trainIns );

        eval.evaluateModel( yatsi, m_testIns );

        System.out.println( eval.toSummaryString() );  

    }

   

    public void CollectiveEMTest() throws Exception

    {

        System.out.println( " **************EM********** " );

       

        CollectiveBagging bagging = new CollectiveBagging();

        bagging.buildClassifier( m_trainInsm_testIns );

       

        Evaluation eval = new Evaluation( m_trainIns );

        eval.evaluateModel( bagging, m_testIns );

        System.out.println( eval.toSummaryString() );  

    }

   

    public static void main(String[] args) throws Exception

    {

        Test percentage = new Test();

       

        percentage.getFileInstances( "F://Program Files//Weka-3-4//data//soybean.arff");

        percentage.FilterRemovePercentageTest();

       

        percentage.J48Test();

        percentage.YATSITest();

        //percentage.LLGCTest();

    }

}

 


  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
目录列表: 2dplanes.arff abalone.arff ailerons.arff Amazon_initial_50_30_10000.arff anneal.arff anneal.ORIG.arff arrhythmia.arff audiology.arff australian.arff auto93.arff autoHorse.arff autoMpg.arff autoPrice.arff autos.arff auto_price.arff balance-scale.arff bank.arff bank32nh.arff bank8FM.arff baskball.arff bodyfat.arff bolts.arff breast-cancer.arff breast-w.arff breastTumor.arff bridges_version1.arff bridges_version2.arff cal_housing.arff car.arff cholesterol.arff cleveland.arff cloud.arff cmc.arff colic.arff colic.ORIG.arff contact-lenses.arff cpu.arff cpu.with.vendor.arff cpu_act.arff cpu_small.arff credit-a.arff credit-g.arff cylinder-bands.arff delta_ailerons.arff delta_elevators.arff dermatology.arff detroit.arff diabetes.arff diabetes_numeric.arff echoMonths.arff ecoli.arff elevators.arff elusage.arff eucalyptus.arff eye_movements.arff fishcatch.arff flags.arff fried.arff fruitfly.arff gascons.arff glass.arff grub-damage.arff heart-c.arff heart-h.arff heart-statlog.arff hepatitis.arff house_16H.arff house_8L.arff housing.arff hungarian.arff hypothyroid.arff ionosphere.arff iris.2D.arff iris.arff kdd_coil_test-1.arff kdd_coil_test-2.arff kdd_coil_test-3.arff kdd_coil_test-4.arff kdd_coil_test-5.arff kdd_coil_test-6.arff kdd_coil_test-7.arff kdd_coil_train-1.arff kdd_coil_train-3.arff kdd_coil_train-4.arff kdd_coil_train-5.arff kdd_coil_train-6.arff kdd_coil_train-7.arff kdd_el_nino-small.arff kdd_internet_usage.arff kdd_ipums_la_97-small.arff kdd_ipums_la_98-small.arff kdd_ipums_la_99-small.arff kdd_JapaneseVowels_test.arff kdd_JapaneseVowels_train.arff kdd_synthetic_control.arff kdd_SyskillWebert-Bands.arff kdd_SyskillWebert-BioMedical.arff kdd_SyskillWebert-Goats.arff kdd_SyskillWebert-Sheep.arff kdd_UNIX_user_data.arff kin8nm.arff kr-vs-kp.arff labor.arff landsat_test.arff landsat_train.arff letter.arff liver-disorders.arff longley.arff lowbwt.arff lung-cancer.arff lymph.arff machine_cpu.arff mbagrade.arff meta.arff mfeat-factors.arff mfeat-fourier.arff mfeat-karhunen.arff mfeat-morphological.arff mfeat-pixel.arff mfeat-zernike.arff molecular-biology_promoters.arff monks-problems-1_test.arff monks-problems-1_train.arff monks-problems-2_test.arff monks-problems-2_train.arff monks-problems-3_test.arff monks-problems-3_train.arff mushroom.arff mv.arff nursery.arff optdigits.arff page-blocks.arff pasture.arff pbc.arff pendigits.arff pharynx.arff pol.arff pollution.arff postoperative-patient-data.arff primary-tumor.arff puma32H.arff puma8NH.arff pwLinear.arff pyrim.arff quake.arff ReutersCorn-test.arff ReutersCorn-train.arff ReutersGrain-test.arff ReutersGrain-train.arff schlvote.arff segment-challenge.arff segment-test.arff segment.arff sensory.arff servo.arff sick.arff sleep.arff solar-flare_1.arff solar-flare_2.arff sonar.arff soybean.arff spambase.arff spectf_test.arff spectf_train.arff spectrometer.arff spect_test.arff spect_train.arff splice.arff sponge.arff squash-stored.arff squash-unstored.arff stock.arff strike.arff supermarket.arff triazines.arff unbalanced.arff vehicle.arff veteran.arff vineyard.arff vote.arff vowel.arff water-treatment.arff waveform-5000.arff weather.nominal.arff weather.numeric.arff white-clover.arff wine.arff wisconsin.arff zoo.arff
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值