<pre name="code" class="java">import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
public class NaiveBeyes {
List<String> data_var=new ArrayList<String>(); //输入的变量数据
List<String> data_tag=new ArrayList<String>();//输入的类别数据
public NaiveBeyes() throws IOException{//函数作用:数据载入
BufferedReader br=new BufferedReader(new FileReader("F:/数据挖掘--算法实现/NaiveBeyes算法/input.txt"));
String line="";
int i=0;
while((line=br.readLine())!=null){
String[] tmp=line.split(" ",2);
if(i==0){i=1;continue;}
this.data_tag.add(tmp[0]);
this.data_var.add(tmp[1]);
}
}
public String NaiveBeyes_method(String var_predict){//函数作用:贝叶斯分类主方法,输入需要分类的变量,输出预测的类别
String tag_predict="";
String[] var=var_predict.split(" ");
Map<String,Integer> map=new HashMap<String,Integer>();
Iterator<String> Iter=this.data_tag.iterator();
while(Iter.hasNext()){
String tmp=Iter.next();
if(map.containsKey(tmp)){
int value=map.get(tmp);
map.remove(tmp);
map.put(tmp, value+1);
}else{map.put(tmp, 1);} //map:用来存储训练数集中每一个类的总数量,格式为,类:数量
}
float max_p=0;
Iterator<String> Iter_map=map.keySet().iterator();
while(Iter_map.hasNext()){//计算需要预测变量分到每一个类的相对条件概率值P(X|Ci)P(Ci)
float p=1;
String tmp_tag=Iter_map.next();
for(int j=0;j<var.length;j++){
int count=0;
for(int i=0;i<this.data_var.size();i++){
String[] tmp=this.data_var.get(i).split(" ");
if((tmp[j].equals(var[j])) && (this.data_tag.get(i).equals(tmp_tag))){count++;}//计算预测变量中每一个子变量的条件概率(即子变量在当前类tmp_tag中出现的概率=count/map.get(tmp_tag) )
}
p=p*((float)count)/map.get(tmp_tag);//每一个子变量相乘即为该类tmp_tag的条件概率值 P(X|Ci)
}
p=p*((float)map.get(tmp_tag))/this.data_var.size();//该类tmp_tag的相对条件概率值 P(X|Ci)P(Ci)
if(max_p<p){max_p=p;tag_predict=tmp_tag;} //如果当前计算类tmp_tag=Iter_map.next()的相对条件概率值高,则更新max_p,预测类别tag_predict更新为tmp_tag
System.out.println("分到"+tmp_tag+"的条件概率相对数为:"+p);
}
return tag_predict;
}
public static void main(String[] args) throws IOException {
NaiveBeyes a=new NaiveBeyes();
System.out.println("变量预测为类别:"+a.NaiveBeyes_method("youth medium yes fair"));
}
}
训练样本:
类别 变量
no youth high no fair
no youth high no excellent
yes middle_aged high no fair
yes senior medium no fair
yes senior low yes fair
no senior low yes excellent
yes middle_aged low yes excellent
no youth medium no fair
yes youth low yes fair
yes senior medium yes fair
yes youth medium yes excellent
yes middle_aged medium no excellent
yes middle_aged high yes fair
no senior medium no excellent
输入预测变量:
"youth medium yes fair"
输出结果:
分到no的条件概率相对数为:0.006857143
分到yes的条件概率相对数为:0.028218696
变量预测为类别:yes