数据挖掘算法的调用

        前段时间刚开始入门数据挖掘的时候,师兄需要我调用weka的经典算法,到时用于实验时做算法对比。由于当时自己Java没学多久,同时对这个开源的软件不熟悉,就上网找资料来学习调用的方法,但几乎找不到这类的资料。我这里就稍微补补“漏洞”吧(可能是我搜索技术差没找着 — — |||)。我的专业词汇懂得少,描述不准确的地方还请各位帮忙指出。同时有个别的调用出来的结果和weka的本地结果不太一致,也请大家多多指教,这篇文章更多是希望起到抛砖引玉的作用。

       

     分类算法: 

     1.调用C4.5

        分类算法我们会额外计算它的分类准确率,实现代码如下,有注释了,分类算法的实现很多共通之处,往后的几个 分类算法就不指明了:


        必须提醒的是,设置分类属性所在行号是必须的。

import weka.classifiers.*;
import weka.core.Instances;
import weka.core.converters.*;
import weka.classifiers.trees.J48;                                         // C45算法(1)
import java.io.File;
import java.io.IOException;

import javax.swing.*;

public class callC45 
{
	public callC45()
	{}
	
	public void Main() throws Exception
	{
		J48 m_classifier = new J48();
		
		File inputFile = new File("E:\\资料\\数据挖掘\\weka-3-5-8\\data\\cpu.with.vendor.arff");   //训练语料文件
		ArffLoader atf = new ArffLoader();
		atf.setFile(inputFile);
		Instances instancesTrain = atf.getDataSet();             //读入训练文件
		
		inputFile = new File("E:\\资料\\数据挖掘\\weka-3-5-8\\data\\cpu.with.vendor.arff"); // 测试语料文件
		atf.setFile(inputFile);
		Instances instancesTest = atf.getDataSet();    //读入训练文件
		instancesTest.setClassIndex(0);                                                                              //设置分类属性所在行号(第一行为0号),instancesTest.numAttributes()可以取得属性总数
		
		double sum = instancesTest.numInstances(),right=0.0f;           //测试语料实例数
		instancesTrain.setClassIndex(0);
		
		m_classifier.buildClassifier(instancesTrain);
		System.out.println(m_classifier.toString());
		System.out.println("");
		for(int i = 0; i < sum ; i++)
		{
			if(m_classifier.classifyInstance(instancesTest.instance(i))==instancesTest.instance(i).classValue())  //如果预测值和答案值相等(测试语料中的分类列提供的须为正确答案,结果才有意义)
			{
				right++;
			}
		}
		System.out.println("J48 classification precision:"+(right/sum));
	}
	
//	public static void main(String[] args) throws Exception
//	{
//		callC45 a = new callC45();
//		a.Main();
//	}
	
}

         2.调用AdaBoost算法

          

import weka.classifiers.meta.AdaBoostM1;
import weka.classifiers.*;
import weka.core.converters.*;
import weka.core.Instances;
import java.io.*;

public class callAdaBoostM1 
{
	public callAdaBoostM1()
	{
		
	}
	
	public void Main() throws Exception
	{
		AdaBoostM1 m_classifier = new AdaBoostM1();
		
		File inputFile = new File("E:\\资料\\数据挖掘\\weka-3-5-8\\data\\cpu.with.vendor.arff");
		ArffLoader arf = new ArffLoader();
		arf.setFile(inputFile);
		Instances instancesTrain = arf.getDataSet();
		
		inputFile = new File("E:\\资料\\数据挖掘\\weka-3-5-8\\data\\cpu.with.vendor.arff");
		arf.setFile(inputFile);
		Instances instancesTest = arf.getDataSet();
		instancesTest.setClassIndex(0);
		
		double sum = instancesTest.numInstances(),right=0.0f;
		instancesTrain.setClassIndex(0);
		
		m_classifier.buildClassifier(instancesTrain);
		System.out.println(m_classifier.toString());
		System.out.println("");
		
		for(int i = 0; i<sum ; i++)
		{
			if(m_classifier.classifyInstance(instancesTest.instance(i)) == instancesTest.instance(i).classValue())
			{
				right++;
			}
		}
		System.out.println("AdaBoostM1 classification precision:"+(right/sum));
		
	}
	
	public static void main(String[] args) throws Exception
	{
		callAdaBoostM1 a = new callAdaBoostM1();
		a.Main();
	}
}

            3.调用朴素bayes算法;

import java.io.*;
import weka.classifiers.*;
import weka.classifiers.bayes.*;
import weka.core.Instances;
import weka.core.converters.*;
import java.io.File;
import java.io.IOException;

public class callbayes
{
	public callbayes()
	{}
	
	public void Main() throws Exception
	{
		NaiveBayes m_classifier = new NaiveBayes();     //朴素贝叶斯的调用
		
		File inputFile = new File("E:\\资料\\数据挖掘\\weka-3-5-8\\data\\labor.arff");
		ArffLoader atf = new ArffLoader();
		atf.setFile(inputFile);
		Instances instancesTrain = atf.getDataSet();
		instancesTrain.setClassIndex(instancesTrain.numAttributes()-1);
		
		inputFile = new File("E:\\资料\\数据挖掘\\weka-3-5-8\\data\\labor.arff");
		atf.setFile(inputFile);
		Instances instancesTest = atf.getDataSet();
		instancesTest.setClassIndex(instancesTest.numAttributes()-1);
		
		double sum =  instancesTest.numInstances(),right = 0.0f;
		
		m_classifier.buildClassifier(instancesTrain);
		System.out.println(m_classifier.toString());
		System.out.println("");
		for(int i = 0; i < sum ; i++)
		{
			if(m_classifier.classifyInstance(instancesTest.instance(i)) == instancesTest.instance(i).classValue())
			{
				right++;
			}
		}
		
		System.out.println("Navisbayes classification precision:" + (right/sum));
		
	}
	
//	public static void main(String[] args) throws Exception
//	{
//		callbayes a = new callbayes();
	//	a.Main();
//	}
	
}

             4.调用KNN算法:

import weka.classifiers.*;
import weka.classifiers.lazy.IBk;
import weka.core.converters.*;
import weka.core.Instances;
import java.io.*;

public class callKNN 
{
	public callKNN()
	{}
	
	public void Main() throws Exception
	{
		IBk m_classifier = new IBk(3);
	//	int k=3;
	//	Classifier m_classifier = new IBk(k);
		File inputFile = new File("E:\\资料\\数据挖掘\\weka-3-5-8\\data\\cpu.with.vendor.arff");
		ArffLoader arf = new ArffLoader();
		arf.setFile(inputFile);
		Instances instancesTrain = arf.getDataSet();
		
		inputFile = new File("E:\\资料\\数据挖掘\\weka-3-5-8\\data\\cpu.with.vendor.arff");
		arf.setFile(inputFile);
		Instances instancesTest = arf.getDataSet();
		instancesTest.setClassIndex(0);
		
		double sum = instancesTest.numInstances(),right=0.0f;
		instancesTrain.setClassIndex(0);
		
		m_classifier.buildClassifier(instancesTrain);
		System.out.println("The k is : "+m_classifier.getKNN());
		System.out.println("");
		System.out.println(m_classifier.toString());
		System.out.println("");
		
		for(int i = 0; i<sum ; i++)
		{
			if(m_classifier.classifyInstance(instancesTest.instance(i)) == instancesTest.instance(i).classValue())
			{
				right++;
			}
		}
		System.out.println("KNN classification precision:"+(right/sum));
	}
	
	public static void main(String[] args) throws Exception
	{
		callKNN a = new callKNN();
		a.Main();
	}
}

            5.调用ID3(CART) 算法

import weka.classifiers.*;
import weka.classifiers.trees.Id3;
import weka.core.Instances;
import weka.core.converters.*;
import java.io.*;

public class callId3
{
	public callId3()
	{}
	
	public void Main() throws Exception
	{
		Id3 m_classifier = new Id3();
		
		File inputFile = new File("E:\\资料\\数据挖掘\\weka-3-5-8\\data\\contact-lenses.arff");
		ArffLoader atf = new ArffLoader();
		atf.setFile(inputFile);
		Instances instancesTrain = atf.getDataSet();
		
		inputFile = new File("E:\\资料\\数据挖掘\\weka-3-5-8\\data\\contact-lenses.arff");
		atf.setFile(inputFile);
		Instances instancesTest = atf.getDataSet();
		instancesTest.setClassIndex(0);
		
		double sum = instancesTest.numInstances(),right=0.0f;
		
		instancesTrain.setClassIndex(0);
		m_classifier.buildClassifier(instancesTrain);
		System.out.println(m_classifier.toString());
		System.out.println("");
		
		for(int i = 0; i < sum ; i++)
		{
			if(m_classifier.classifyInstance(instancesTest.instance(i)) == instancesTest.instance(i).classValue())
			{
				right++;
			}
		}
		
		System.out.println("Id3 classification precision:"+(right/sum));
	}
	
//	public static void main(String[] args) throws Exception
	//{
	//	callId3 a = new callId3();
	//	a.Main();
//	}
	
}

        聚类算法:

        主要是调用了EM算法和KM算法,SVM由于还不会用libsvm,就先放着了,聚类算法的调用的不同分类算法之处就是不必设置分类属性行号:

        1.调用EM算法;

         

import weka.clusterers.EM;
import weka.core.Instances;
import weka.core.converters.*;
import weka.clusterers.*;
import java.io.*;

public class callEM 
{
	public callEM()
	{
	}
	
	public void Main() throws Exception
	{
		EM m_cluster = new EM();
		File inputFile = new File("E:\\资料\\数据挖掘\\weka-3-5-8\\data\\cpu.arff"); 
		ArffLoader arf = new ArffLoader();
		arf.setFile(inputFile);
		Instances instancesTrain = arf.getDataSet();
		
		 inputFile = new File("E:\\资料\\数据挖掘\\weka-3-5-8\\data\\cpu.arff"); 
		 arf.setFile(inputFile);
		 Instances instancesTest = arf.getDataSet();
		 
		 m_cluster.buildClusterer(instancesTrain);
		 System.out.println("The number of cluster : "+m_cluster.numberOfClusters());
		 int num = m_cluster.numberOfClusters();
		 System.out.println("");
		 System.out.println(m_cluster.toString());
		 System.out.println("");
		 
		 double[]  predict = m_cluster.clusterPriors();
		 for(int i = 0; i<num ; i++)
		 {
			System.out.println("第  "+i+" 个 聚类的先验为 : "+predict[i]);
		 }
	}
	
	public static void main(String[] args) throws Exception
	{
		callEM a = new callEM();
		a.Main();
	}
}

             2.调用KM算法;

import weka.clusterers.*;
import weka.core.converters.*;
import weka.core.Instances;
import java.io.*;

public class callKM
{
	public callKM()
	{
		
	}
	
	public void Main() throws Exception
	{
		SimpleKMeans m_cluster = new SimpleKMeans();
		File inputFile = new File("E:\\资料\\数据挖掘\\weka-3-5-8\\data\\cpu.arff"); 
		ArffLoader arf = new ArffLoader();
		arf.setFile(inputFile);
		Instances instancesTrain = arf.getDataSet();
		
		inputFile = new File("E:\\资料\\数据挖掘\\weka-3-5-8\\data\\cpu.arff"); 
		arf.setFile(inputFile);
		Instances instancesTest = arf.getDataSet();
		
		m_cluster.buildClusterer(instancesTrain);
		
		System.out.println("The number of cluster : "+m_cluster.numberOfClusters());
		 int num = m_cluster.numberOfClusters();
		 System.out.println("");
		 System.out.println(m_cluster.toString());
		 System.out.println("");
		 
		 int[] size = m_cluster.getClusterSizes();
		 int sum = 0;
		 for(int i = 0; i<num ; i++)
		 {
			 sum += size[i];
		 }
		 for(int i = 0; i<num ; i++)
		 {
			System.out.println("第  "+i+" 个 聚类的大小为 : "+size[i]+"   所占比例为 : "+(double)size[i]/(double)sum);
		 }
		
	}
	
	public static void main(String[] args) throws Exception
	{
		callKM a = new callKM();
		a.Main();
	}
}

           关联规则算法

           只是调用了Apriori算法,调用方法和聚类算法的基本一样了。。。

           1.调用Apriori算法


            

import weka.associations.*;
import weka.core.converters.*;
import weka.core.Instances;
import java.io.*;

public class callApriori
{
	public callApriori()
	{
		
	}
	
	public void Main() throws Exception
	{
		Apriori m_association = new Apriori();
		File inputFile = new File("E:\\资料\\数据挖掘\\weka-3-5-8\\data\\contact-lenses.arff");
		ArffLoader arf = new ArffLoader();
		arf.setFile(inputFile);
		Instances instancesTrain = arf.getDataSet();
		
		inputFile = new File("E:\\资料\\数据挖掘\\weka-3-5-8\\data\\contact-lenses.arff");
		arf.setFile(inputFile);
		Instances instancesTest = arf.getDataSet();
		
		m_association.buildAssociations(instancesTrain);
		System.out.println("The Number of Rules : "+m_association.getNumRules());
		System.out.println(m_association.toString());
		System.out.println("");
		
	}
	
//	public static void main(String[] args) throws Exception
//	{
	//	callApriori a = new callApriori();
//		a.Main();
//	}
}

              最后要说明的一点是调用算法的方法很多,需要调用的函数应“量身定调用”,切忌千篇一律~~

           

  • 0
    点赞
  • 9
    收藏
    觉得还不错? 一键收藏
  • 2
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值