朴素贝叶斯

#!/usr/bin/env python3
# -*- coding:utf-8 -*-
__author__ = 'lbs'

import numpy as np
from collections import Counter

'''
说明: 这里只是对于X是离散型变量做的,对于连续型变量需要计算其相应的概率,对于既有连续型变量又有离散型变量需要分开计算...
'''


class NaiveBayes(object):

    def __init__(self, train_set):
        self.eps = np.spacing(1)
        self.train_set = train_set
        self.x1_set = set([tr[0][0] for tr in self.train_set])
        self.x2_set = set([tr[0][1] for tr in self.train_set])
        self.y_labels =  Counter([tr[1] for tr in self.train_set]).items()

    def priori_probability(self):
        train_set_len = len(self.train_set)
        priori_prob = {str(label): (count / train_set_len + self.eps) for label, count in self.y_labels}
        return priori_prob

    def conditional_probability(self):
        y_conditional_probs = {}
        for yk, yv in self.y_labels:
            for x1 in self.x1_set:
                y_conditional_probs.update({"x1_"+str(x1)+"_"+str(yk):
                                                len(list(filter(lambda x: x[0][0]==x1 and x[1]==yk,
                                                                self.train_set)))/yv + self.eps})
            for x2 in self.x2_set:
                y_conditional_probs.update({"x2_" + str(x2) + "_" + str(yk):
                                                len(list(filter(lambda x: x[0][1] == x2 and x[1] == yk,
                                                                self.train_set)))/yv + self.eps})

        return y_conditional_probs

    def predict(self, input_data):
        # input_data = [2, "S"]  # 单次查询
        priori_prob = self.priori_probability()
        conditional_prob = self.conditional_probability()

        posterior_probs = {}
        for y_label, prob in priori_prob.items():
            tmp_prob = []
            for k, v in conditional_prob.items():
                if k.split("_")[-1] == str(y_label):
                    if k.split("_")[0] == "x1" and k.split("_")[1] == str(input_data[0]):
                        tmp_prob.append(v)
                    if k.split("_")[0] == "x2" and k.split("_")[1] == str(input_data[1]):
                        tmp_prob.append(v)
                    else:
                        pass
            multiply_prob = prob * np.prod(tmp_prob)
            posterior_probs.update({str(y_label):multiply_prob})
        # print("posterior_probs: ", posterior_probs)
        # output predict label
        max_posterior_prob_label = max(posterior_probs.items(), key=lambda x: x[1])[0]
        return max_posterior_prob_label


if __name__ == '__main__':
    train_data = [([1, "S"], -1), ([1, "M"], -1), ([1, "M"], 1), ([1, "S"], 1), ([1, "S"], -1),
                 ([2, "S"], -1), ([2, "M"], -1), ([2, "M"], 1), ([2, "L"], 1), ([2, "L"], 1),
                 ([3, "L"], 1), ([3, "M"], 1), ([3, "M"], 1), ([3, "L"], 1), ([3, "L"], -1)]
    naive_bayes = NaiveBayes(train_set=train_data)

    test_sample = [2, "S"]
    result = naive_bayes.predict(input_data=test_sample)
    print("result: ", result)
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值