江科大数据挖掘实验三

8.7 下表由雇员数据库的训练数据组成。数据已泛化。例如,age“3135”表示年龄在31~35之间。对于给定的行,count表示department、status、age和salary在该行具有给定值的元组数。

Department  Status    Age Salary    Count

Sales     Senior   31-35     46K-50K      30

Sales     junior    26-30     26K-30K      40

Sales     junior    31-35     31K-35K      40

sysytems      junior    21-25     46K-50K      20

sysytems      Senior   31-35     66K-70K      5

sysytems      junior    26-30     46K-50K      3

sysytems      Senior   41-45     66K-70K      3

marketing    Senior   36-40     46K-50K      10

marketing    junior    31-35     41K-45K      4

secretary      Senior   45-60     36K-40K      4

secretary      junior    26-30     26K-30K      6

————————————————

给定一个数据元组,属性为​​“systems”, “26. . . 30”, 和 “46–50K”,该元组status的朴素贝叶斯分类是什么?

实现:

JavaBean.java

/**

 * 训练样本的属性 javaBean

 *

 */

public class JavaBean {

    int age;

    String department;

    String status;

    String salary;

    String count;

    public JavaBean(){

    }

    public JavaBean(int age,String department,String status,String salary,String count){

        this.age=age;

        this.department=department;

        this.status=status;

        this.salary=salary;

        this.count=count;

    }

    public int getAge() {

        return age;

    }

    public void setAge(int age) {

        this.age = age;

    }

    public String getDepartment() {

        return department;

    }

    public void setDepartment(String department) {

        this.department = department;

    }

    public String getStatus() {

        return status;

    }

    public void setStatus(String status) {

        this.status = status;

    }

    public String getSalary() {

        return salary;

    }

    public void setSalary(String salary) {

        this.salary = salary;

    }

    public String getCount() {

        return count;

    }

    public void setCount(String count) {

        this.count = count;

    }

    @Override

    public String toString() {

        return "JavaBean [age=" + age + ", department=" + department + ", status="

                + status + ", salary=" + salary + ", count="

                + count + "]";

    }

}

Test.java

import java.io.BufferedReader;

import java.io.File;

import java.io.FileReader;

import java.util.ArrayList;

public class TestNB {

    /**data_length

     * 算法的思想

     */

    public static ArrayList<JavaBean> list = new ArrayList<JavaBean>();;

    static int data_length=0;

    public static void main(String[] args) {

        // 1.读取数据,放入list容器中

        File file = new File("D://test.txt");

        txt2String(file);

        //数据测试样本

        testData(26,"systems","46K");

    }

    // 读取样本数据

    public static void txt2String(File file) {

        try {

            BufferedReader br = new BufferedReader(new FileReader(file));// 构造一个BufferedReader类来读取文件

            String s = null;

            while ((s = br.readLine()) != null) {// 使用readLine方法,一次读一行

                data_length++;

                splitt(s);

            }

            br.close();

        } catch (Exception e) {

            e.printStackTrace();

        }

    }

    // 存入ArrayList中

    public static void splitt(String str){

        String strr = str.trim();

        String[] abc = strr.split("[\\p{Space}]+");

        int age=Integer.parseInt(abc[0]);

        int count=Integer.parseInt(abc[4]);

        JavaBean bean=new JavaBean(age, abc[1], abc[2], abc[3], count);

        list.add(bean);

    }

    // 训练样本,测试

    public static void testData(int age,String department,String salary){

        //训练样本

        int number_senior=0;

        int number_junior=0;

        // age 个数

        int num_age_senior=0;

        int num_age_junior=0;

        // department

        int num_department_senior=0;

        int num_department_junior=0;

        // salary

        int num_salary_senior=0;

        int num_salary_junior=0;

        int data_count=0;

        for(int i=0;i<list.size();i++) {

            JavaBean bb=list.get(i);

            data_count+=bb.getCount();

        }

        //遍历List 获得数据

        for(int i=0;i<list.size();i++){

            JavaBean bb=list.get(i);

            if(bb.getStatus().equals("senior")){ //senior

                number_senior+=bb.getCount();

                if(bb.getDepartment().equals(department)){//department

                    num_department_senior+=bb.getCount();

                }

                if(bb.getSalary().equals(salary)){//salary

                    num_salary_senior+=bb.getCount();

                }

                if(bb.getAge()==age){//age

                    num_age_senior+=bb.getCount();

                }

            }else {//junior

                number_junior+=bb.getCount();

                if(bb.getDepartment().equals(department)){//department

                    num_department_junior+=bb.getCount();

                }

                if(bb.getSalary().equals(salary)){//salary

                    num_salary_junior+=bb.getCount();

                }

                if(bb.getAge()==age){//age

                    num_age_junior+=bb.getCount();

                }

            }

        }

        System.out.println("number_senior:"+number_senior);

        System.out.println("number_junior:"+number_junior);

        System.out.println("num_age_senior:"+num_age_senior);

        System.out.println("num_age_junior:"+num_age_junior);

        System.out.println("num_department_senior:"+num_department_senior);

        System.out.println("num_department_junior:"+num_department_junior);

        System.out.println("num_salary_senior:"+num_salary_senior);

        System.out.println("num_salary_junior:"+num_salary_junior);

        /// 判断

        double nb_senior=(1.0*num_age_senior/number_senior)*(1.0*num_department_senior/number_senior)*(1.0*num_salary_senior/number_senior);

        double nb_junior=(1.0*num_age_junior/number_junior)*(1.0*num_department_junior/number_junior)*(1.0*num_salary_junior/number_junior);

        System.out.println("该数据元组的senior的概率:"+nb_senior);

        System.out.println("该数据元组的junior的概率:"+nb_junior);

        if(nb_senior>nb_junior){

            System.out.println("senior的概率大");

        }else {

            System.out.println("junior的概率大");

        }

    }

}样本:

结果:

小结:

朴素贝叶斯中的朴素一词的来源就是假设各特征之间相互独立。这一假设使得朴素贝叶斯算法变得简单,但有时会牺牲一定的分类准确率。

首先给出贝叶斯公式:

    换成分类任务的表达式:

     我们最终求的p(类别|特征)即可!就相当于完成了我们的任务。

     则,朴素贝特斯公式为:

  • 0
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值