#!/usr/bin/env python3
# -*- coding:utf-8 -*-
__author__ = 'lbs'
import numpy as np
from collections import Counter
'''
说明: 这里只是对于X是离散型变量做的,对于连续型变量需要计算其相应的概率,对于既有连续型变量又有离散型变量需要分开计算...
'''
class NaiveBayes(object):
def __init__(self, train_set):
self.eps = np.spacing(1)
self.train_set = train_set
self.x1_set = set([tr[0][0] for tr in self.train_set])
self.x2_set = set([tr[0][1] for tr in self.train_set])
self.y_labels = Counter([tr[1] for tr in self.train_set]).items()
def priori_probability(self):
train_set_len = len(self.train_set)
priori_prob = {str(label): (count / train_set_len + self.eps) for label, count in self.y_labels}
return priori_prob
def conditional_probability(self):
y_conditional_probs = {}
for yk, yv in self.y_labels:
for x1 in self.x1_set:
y_conditional_probs.update({"x1_"+str(x1)+"_"+str(yk):
len(list(filter(lambda x: x[0][0]==x1 and x[1]==yk,
self.train_set)))/yv + self.eps})
for x2 in self.x2_set:
y_conditional_probs.update({"x2_" + str(x2) + "_" + str(yk):
len(list(filter(lambda x: x[0][1] == x2 and x[1] == yk,
self.train_set)))/yv + self.eps})
return y_conditional_probs
def predict(self, input_data):
# input_data = [2, "S"] # 单次查询
priori_prob = self.priori_probability()
conditional_prob = self.conditional_probability()
posterior_probs = {}
for y_label, prob in priori_prob.items():
tmp_prob = []
for k, v in conditional_prob.items():
if k.split("_")[-1] == str(y_label):
if k.split("_")[0] == "x1" and k.split("_")[1] == str(input_data[0]):
tmp_prob.append(v)
if k.split("_")[0] == "x2" and k.split("_")[1] == str(input_data[1]):
tmp_prob.append(v)
else:
pass
multiply_prob = prob * np.prod(tmp_prob)
posterior_probs.update({str(y_label):multiply_prob})
# print("posterior_probs: ", posterior_probs)
# output predict label
max_posterior_prob_label = max(posterior_probs.items(), key=lambda x: x[1])[0]
return max_posterior_prob_label
if __name__ == '__main__':
train_data = [([1, "S"], -1), ([1, "M"], -1), ([1, "M"], 1), ([1, "S"], 1), ([1, "S"], -1),
([2, "S"], -1), ([2, "M"], -1), ([2, "M"], 1), ([2, "L"], 1), ([2, "L"], 1),
([3, "L"], 1), ([3, "M"], 1), ([3, "M"], 1), ([3, "L"], 1), ([3, "L"], -1)]
naive_bayes = NaiveBayes(train_set=train_data)
test_sample = [2, "S"]
result = naive_bayes.predict(input_data=test_sample)
print("result: ", result)
朴素贝叶斯
最新推荐文章于 2020-10-23 22:58:18 发布