将待预测的特征值作为参数传入并进行筛选,减少概率计算的次数
import numpy as np
from collections import Counter
class Bayes:
def __init__(self):
self.t_data = np.array([[1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3],
['S', 'M', 'M', 'S', 'S', 'S', 'M', 'M', 'L', 'L', 'L', 'M', 'M', 'L', 'L'],
[-1, -1, 1, 1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, -1]])
self.p_y = {}
# self.p_y_1 = {} # 使用字典方便计算时调用
# self.p_y_0 = {}
def train_1(self,a,b):
# 将元素数量统计,然后计数并返回一个字典,键为元素,值为元素个数。
count_y = Counter(self.t_data[2]) # 统计y的种类及数量,用于后续计算
ys = {} # 使该二维分类器不局限于两种类(y)(不失一般性),接下来先统计y的种类,并计算概率,再切分训练数据
for y in count_y.keys():
ys[y] = []
self.p_y[y] = count_y[y] / len(self.t_data[0])
# print(self.p_y)
# print(self.p_y.keys())
#两个列表,分别存Y=-1的数据和Y=1的数据
for i in range(len(self.t_data[0])):
ys[self.t_data[2][i]].append(self.t_data[:, i])
# print(ys)
#ys={
# -1:[],
# 1:[]}
count_x1_y={}
count_x2_y={}
p_x1_y={}
p_x2_y={}
for item in ys.items():
count_x1_y[item[0]]=0
count_x2_y[item[0]] = 0
for elms in item[1]:
# print(len(item[1]))
if elms[0]==a:
count_x1_y[item[0]]+=1
if elms[1]==b:
count_x2_y[item[0]]+=1
p_x1_y[item[0]]=count_x1_y[item[0]]/len(item[1])
p_x2_y[item[0]] = count_x2_y[item[0]] / len(item[1])
print('self.p_y',self.p_y)
print('p_x1_y',p_x1_y)
print('p_x2_y',p_x2_y)
PF={}
#概率乘积
for ps in self.p_y.keys():
PF[ps]=self.p_y[ps]*p_x1_y[ps]*p_x2_y[ps]
print('PF',PF)
print(max(PF,key=PF.get))
if __name__=='__main__':
a = input('请输入特征1')
b = input('请输入特征1')
B = Bayes()
B.train_1(a, b)