使用eclipse 、Java 、weka
1、在weka的安装目录下找到weka.jar和weka-src.jar两个jar包,如下图所示:
2、在eclipse中新建Java项目,然后右键build-path,在Libraries标签页里点击添加外部jar吧,然后将上一步中找到的weka.jar和weka-src.jar添加进去,然后点击OK,如下图所示:
3、在新建的Java项目中新建包wekaTest和类J48Test,将分词后的中文文本添加筛选器StringToWordVector(对文本数据进行预处理),然后使用weka自带的分类算法进行分类,并返回每个实例对应的分类名称,具体实现代码如下:
package wekaTest;
import java.io.File;
import weka.classifiers.Evaluation;
import weka.classifiers.bayes.NaiveBayes;
import weka.classifiers.trees.J48;
import weka.classifiers.trees.RandomForest;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.converters.ArffLoader;
import weka.filters.Filter;
import weka.filters.unsupervised.attribute.StringToWordVector;
public class J48Test {
public static void main(String[] args) throws Exception {
Instances ins = null;
try {
File file = new File("E:\\data.arff");
ArffLoader loader = new ArffLoader();
loader.setFile(file);
ins = loader.getDataSet();
ins.setClassIndex(ins.numAttributes()-1);
StringToWordVector filter = new StringToWordVector();
filter.setIDFTransform(true);
filter.setTFTransform(true);
filter.setInputFormat(ins);
Instances newIns = Filter.useFilter(ins, filter);
//System.out.println(newIns.toString());
RandomForest RFmodel = new RandomForest();
J48 Jmodel = new J48();
NaiveBayes NBmodel = new NaiveBayes();
RFmodel.buildClassifier(newIns);
Jmodel.buildClassifier(newIns);
NBmodel.buildClassifier(newIns);
Instance testInst;
Evaluation testingEvaluationRF = new Evaluation(newIns);
Evaluation testingEvaluationJ48 = new Evaluation(newIns);
Evaluation testingEvaluationNB = new Evaluation(newIns);
int length = newIns.numInstances();
for(int i = 0; i < length ; i++){
testInst = newIns.instance(i); //System.out.println(newIns.classAttribute().value((int) RFmodel.classifyInstance(newIns.instance(i))));
//输出分类的类名 //System.out.println(testInst.classAttribute().value((int) RFmodel.classifyInstance(testInst)));
testingEvaluationRF.evaluateModelOnceAndRecordPrediction(RFmodel, testInst);
testingEvaluationJ48.evaluateModelOnceAndRecordPrediction(Jmodel, testInst);
testingEvaluationNB.evaluateModelOnceAndRecordPrediction(NBmodel, testInst);
}
System.out.println("RandomForest的正确率:"+(1-testingEvaluationRF.errorRate()));
System.out.println("J48的正确率:"+(1-testingEvaluationJ48.errorRate()));
System.out.println("NaiveBayes的正确率:"+(1-testingEvaluationNB.errorRate()));
// System.out.println("RandomForest:"+testingEvaluationRF.toSummaryString());//输出总结信息
// System.out.println("RandomForest:"+testingEvaluationRF.toClassDetailsString());//输出分类详细信息
// System.out.println("RandomForest:"+testingEvaluationRF.toMatrixString());//输出分类的混淆矩阵
} catch (Exception e) {
e.printStackTrace();
}
}
}
然后运行查看结果