话不多说,直接上代码,若有帮助,帮忙点赞哦
python版,或其他机器学习算法,可发邮箱:476562571@qq.com
主要实现功能:
特征 二值判别
递归遍历文件目录加载训练数据集
召回率计算
决策树构建
决策树存储(存储json文件)需要依赖 com.alibab fastjson-1.2.7.jar
决策树读取(读取json文件)需要依赖 com.alibab fastjson-1.2.7.jar
package com.code.ku.qa.metion.classifier;
import com.alibaba.fastjson.JSONObject;
import com.code.ku.qa.metion.Metion;
import org.apache.commons.io.FileUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.IOException;
import java.io.Serializable;
import java.util.*;
/**
* @Date: 2018/11/15
* @Time: 19:08
* @User: Likf
* @Description:
*/
public class DecisionTreeID3 {
/** Logger */
private static final Logger _LOG = LoggerFactory.getLogger(DecisionTreeID3.class);
public static TreeNode tree = null;
static{
tree = loadTreeFromJsonFile(Metion.Config.getPath("classify\\id3\\tree.json"));
}
public DecisionTreeID3() {
}
public static String classify(List<String> labels,List<String> testData){
return classify(tree,labels,testData);
}
/**
* 计算香农熵
* @param dataset
*/
public double calChannonEnt(List<List<String>> dataset){
Map<String,Double> outLabels = new HashMap<>();
for (List<String> fetures:dataset){
String outLabel = fetures.get(fetures.size()-1);
if(!outLabels.keySet().contains(outLabel)){
outLabels.put(outLabel,0.0);
}
outLabels.put(outLabel,outLabels.get(outLabel)+1);
}
double channonEnt = 0.0;
for(Map.Entry<String,Double> entry:outLabels.entrySet()){
double pi = entry.getValue()/dataset.size();
channonEnt -= pi*(Math.log(pi)/Math.log(2.0));
}
return channonEnt;
}
/**
* 划分数据集
* @param dataset
* @param fetureIndex
* @param value
* @return
*/
private List<List<String>> splitDataSet(List<List<String>> dataset,int fetureIndex,String value){
List<List<String>> subDataSet = new ArrayList<>();
for(List<String> fetures:dataset){
try {
if(fetures.get(fetureIndex).equals(value)){
List<String> reduceFetures = new LinkedList<>();
reduceFetures.addAll(fetures.subList(0,fetureIndex));
reduceFetures.addAll(fetures.subList(fetureIndex+1,fetures.size()));
subDataSet.add(reduceFetures);
}
} catch (Exception e) {
_LOG.trace("异常特征:"+fetures);
}
}
return subDataSet;
}
/**
* 选取信息增益最大的特征划分数据集
* @param dataS