关于Java实现朴素贝叶斯算法

最新推荐文章于 2024-08-20 00:54:19 发布

ZZ同学

最新推荐文章于 2024-08-20 00:54:19 发布

阅读量1k

点赞数 1

分类专栏： java 机器学习算法文章标签：算法

本文链接：https://blog.csdn.net/qq_38773180/article/details/78306273

版权

java 同时被 2 个专栏收录

21 篇文章 2 订阅

订阅专栏

机器学习算法

7 篇文章 1 订阅

订阅专栏

package naiveBayesian;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
public class NativeBeyes {
	private ArrayList<ArrayList<String>> datas=new ArrayList<ArrayList<String>>();//训练数据集
	private ArrayList<String> test=new ArrayList<String>();//测试数据集	
	private static ArrayList<ArrayList<String>> subdatas=new ArrayList<ArrayList<String>>();//同类别数据集
	public ArrayList<Double> p=new ArrayList<Double>();//不同类别对应概率
	public ArrayList<String> kinds=new ArrayList<String>();//记录种类值
	private ArrayList<Double> p1=new ArrayList<Double>();//不同类别对给定多维向量特征的概率
	public NativeBeyes(ArrayList<ArrayList<String>> datas,ArrayList<String> test) {
		this.datas=datas;
		this.test=test;
	}
	/*
	 * 计算类别种数计算各类别的概率
	 */
	public int sorts(ArrayList<ArrayList<String>> datas,ArrayList<String> test) {
		int a=0,b=0,c=datas.size();//类别种类计数器及首个类别计数
		while(datas.isEmpty()!=true) {
		for(int i=0;i<datas.size();i++) {
			if(datas.get(0).get(datas.get(0).size()-1).equals(datas.get(i).get(datas.get(0).size()-1))==true) {
				this.subdatas.add(datas.get(i));
				b++;
				if(b>=2) {
					datas.remove(i);
					i--;
				}
				
			}
		}
		datas.remove(0);
		p.add((double)b);
		b=0;
		a++;
		}
		for(int i=0;i<a;i++) {
			p.add(p.get(i)/c);
		}
		for(int i=0;i<a;i++) {
			p.remove(0);
		}
		numerator(test);
		return a;
	}
	//***********************************//
	/*
	 * 计算分母（denominator）的值，即相应属性值在对应属性集中的概率值 
	 */
	public double denominator(ArrayList<ArrayList<String>> datas,ArrayList<String> test) {
		double pp=1.0;
		int a=0;
		for(int i=0;i<test.size();i++) {
			for(int j=0;j<datas.size();j++) {
				if(datas.get(j).get(i).equals(test.get(i))==true) {
					a++;
				}
			}
			pp*=(double)a/datas.size();
			a=0;
		}
		return pp;
	}
	//************************************//
	/*
	 * 计算相应类别下不同特征属性值的条件概率(部分分子numerator值)
	 */	
	public void numerator(ArrayList<String> test) {
		double p=1.0;
		int b=0,c=0;
		while(this.subdatas.isEmpty()!=true) {
		for(int i=0;i<this.subdatas.size();i++) {
			if(this.subdatas.get(0).get(this.subdatas.get(0).size()-1).equals(this.subdatas.get(i).get(this.subdatas.get(0).size()-1))==true) {
				b++;
			}
			else {
				break;
			}
		}
		kinds.add(this.subdatas.get(0).get(this.subdatas.get(0).size()-1));
		for(int i=0;i<test.size();i++) {
			for(int j=0;j<b;j++) {
				if(test.get(i).equals(this.subdatas.get(j).get(i))) {
					c++;
				}
			}
			p*=(double)c/b;
			c=0;
		}
		p1.add(p);
		p=1.0;
		String str=this.subdatas.get(0).get(this.subdatas.get(0).size()-1);
		for(int i=0;i<b;i++) {
				this.subdatas.remove(0);
		}
		b=0;
		}
	}
	//******************************************//
	/*
	 * 比较得出概率最大的可能分类
	 */
	
	public int compare(ArrayList<ArrayList<String>> datas,ArrayList<String> test,int c,double q,ArrayList<Double> p,ArrayList<Double> p1) {
		double[] num=new double[c];//1
		int t=0;
		double a=0.0;
		for(int i=0;i<num.length;i++) {
			num[i]=(double)p.get(i)*p1.get(i)/q;
		}
		a=num[0];
		for(int i=0;i<num.length;i++) {
			if(a<num[i]) {
				a=num[i];
				t=i;
			}
		}
		System.out.printf("该分类可信度为：%.2f     ",num[t]);
		return t;
	}
	//********************************************//
	public String predect(ArrayList<ArrayList<String>> datas,ArrayList<String> test) throws IOException {		
			        NativeBeyes beyes=new NativeBeyes(datas,test);
			    	beyes.numerator(test);//p(类别下的条件概率)
			    	double bb=beyes.denominator(datas,test);
			    	String str2=beyes.kinds.get(beyes.compare(datas,test,beyes.sorts(datas,test),bb,beyes.p,beyes.p1));
			    	System.out.println("分类为："+str2);
		            return str2;
	}
}

package naiveBayesian;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;

public class Mian {

	public static void main(String[] args) throws IOException {
		ArrayList<ArrayList<String>> datas=new ArrayList<ArrayList<String>>();//训练数据集
		ArrayList<String> answer=new ArrayList<String>();
		String str=null;
		int count=0,c=0;
		FileInputStream fis = new FileInputStream("C:\\Users\\zfw\\Desktop\\java项目\\datas.txt"); 
        InputStreamReader isr = new InputStreamReader(fis, "UTF-8"); 
        BufferedReader br = new BufferedReader(isr); 
        while((str=br.readLine())!=null) {
        	String[] strs=str.split(",");
        	ArrayList<String> array=new ArrayList<String>();
        	for(int i=0;i<strs.length;i++)
        		array.add(strs[i]);
        	datas.add(array);
        }
        br.close();		
        FileInputStream fis2 = new FileInputStream("C:\\Users\\zfw\\Desktop\\java项目\\test.txt"); 
        InputStreamReader isr2 = new InputStreamReader(fis2, "UTF-8"); 
        BufferedReader br2 = new BufferedReader(isr2); 
        while((str=br2.readLine())!=null) {
        	c++;
        	String[] strs=str.split(",");
        	ArrayList<String> test=new ArrayList<String>();
        	for(int i=0;i<strs.length-1;i++)
        		test.add(strs[i]);
        	NativeBeyes beyes=new NativeBeyes(datas,test);
        	if(beyes.predect(datas,test).equals(strs[strs.length-1])) {
        		count++;
        	}
    		FileInputStream fis1 = new FileInputStream("C:\\Users\\zfw\\Desktop\\java项目\\datas.txt"); 
            InputStreamReader isr1 = new InputStreamReader(fis1, "UTF-8"); 
            BufferedReader br1 = new BufferedReader(isr1); 
            while((str=br1.readLine())!=null) {
            	String[] strs1=str.split(",");
            	ArrayList<String> array=new ArrayList<String>();
            	for(int i=0;i<strs1.length;i++)
            		array.add(strs1[i]);
            	datas.add(array);
            }
            br1.close();	
        }
        br2.close();     
        System.out.printf("正确率为：%.2f",(double)count/datas.size()*100);
        System.out.println("%");
	}
}

关于对朴素贝叶斯算法的理解可参考：

https://mp.weixin.qq.com/s?__biz=MzI4MDYzNzg4Mw==&mid=2247