本学期课程,需要对数据进行分类,根据网上资料搭建一个朴素贝叶斯分类器,记录一下,代码如下
package Test;
import java.io.File;
import weka.classifiers.Classifier;
import weka.classifiers.Evaluation;
import weka.classifiers.bayes.NaiveBayesMultinomial;
import weka.core.Capabilities;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.converters.ArffLoader;
//import weka.filters.supervised.instance.SMOTE;
import weka.core.Instances;
import weka.core.converters.ConverterUtils.DataSource;
import weka.filters.Filter;
public class Test{
public int m_NumInstance;
public int m_NumAttribute;
public int m_NumClass;
/**
* 第j个属性的起始下标
*/
public int m_StartIndex[] = null;
/**
* 统计类别c出现的次数
*/
public double[] m_CountClass = null;
/**
* 统计属性值Aj在类别c的情况下出现的次数
*/
public double[][] m_CountAttandClass = null;
public static void main(String[] args) throws Exception {
// TODO Auto-generated method stub
ArffLoader atf = new ArffLoader(); //Reads a source that is in arff (attribute relation file format) format.
File inputFile = new File("test-best200 -200.arff");//读入训练文件
atf.setFile(inputFile);
Instances instancesTrain = atf.getDataSet(); // 得到格式化的训练数据
instancesTrain.setClassIndex(instancesTrain.numAttributes()-1);//设置分类属性所在行号(第一行为0号),instancesTrain.numAttributes()可以取得属性总数
//Filter filter = new SMOTE();
inputFile = new File("test-best200 -200.arff");//读入测试文件
atf.setFile(inputFile);
Instances instancesTest = atf.getDataSet(); // 得到格式化的测试数据
instancesTest.setClassIndex(instancesTest.numAttributes() - 1); //设置分类属性所在行号(第一行为0号),instancesTest.numAttributes()可以取得属性总数
Classifier m_classifier = new NaiveBayesMultinomial();//用以建立一个naive bayes分类器
m_classifier.buildClassifier(instancesTrain); //训练
Evaluation eval = new Evaluation(instancesTrain); //构造评价器
eval.evaluateModel(m_classifier, instancesTest);//用测试数据集来评价m_classifier
System.out.println(eval.toSummaryString("=== Summary ===\n",false)); //输出信息
System.out.println(eval.toMatrixString("=== Confusion Matrix ===\n"));//Confusion Matrix
System.out.println(eval.toClassDetailsString());
}
}