import pandas as pd
import numpy as np
import math
# 先拿数据,算先验概率在算条件概率,对比得出结论
# 色泽(1青绿,2浅白,乌黑);根蒂(1蜷缩,2稍蜷,3硬挺);敲声(1浊响,2沉闷,3清脆)
# 纹理(1清晰,2稍糊,3模糊);脐部(1凹陷,2稍凹,3平坦);触感(1硬滑,2软粘)
dataTrain = pd.read_csv("data/dataList.txt")
input, output = dataTrain.iloc[:, 0:8], dataTrain.iloc[:, 8]
# 求概率
def probability (x, y):
return x * 1.0 / y
# 求概率密度
def P_continue (x, even, d):
# 方差d, 均值even
return 1 / (d * (2 * math.pi) ** (1/2)) * math.e ** (- ((x - even) ** 2 / (2 * d **2)))
# 好瓜坏瓜在y瓜中的比例
def P_gua (y):
count, count_bad, count_good =0, 0, 0
for i in y:
count += 1
if i == 0:
count_bad += 1
else:
count_good += 1
return count_good, count_bad, probability(count_good, count), probability(count_bad, count)
# 好瓜数量,怀瓜数量,好瓜概率,坏瓜概率
count_good, count_bad, Pg, Pb = P_gua(output)
# 每个特征的概率p(清脆|好瓜)
def P_kind (jugde):
for i, value in enumerate(jugde):
# 样本中第i个属性
if jugde[i] >= 1:
sum_good, sum_bad = 0, 0 # 好瓜/坏瓜中第i个属性与样本一样的数量
for row, index in dataTrain.iterrows():
if value == index[i] and index[-1] == 0:
sum_bad += 1
elif value == index[i] and index[-1] == 1:
sum_good += 1
P_good[i] = probability(sum_good, count_good)
P_bad[i] = probability(sum_bad, count_bad)
else:
add_good, add_bad = 0, 0
sum_good, sum_bad = 0, 0 # 好瓜/坏瓜中第i个属性与样本一样的数量
for row, index in dataTrain.iterrows():
# 计算均值
if index[-1] == 1:
sum_good += index[i]
else:
sum_bad += index[i]
even_good = sum_good / count_good
even_bad = sum_bad / count_bad
for row, index in dataTrain.iterrows():
# 计算方差
if index[-1] == 1:
add_good += (value - even_good) ** 2
else:
add_bad += (value - even_bad) ** 2
d_good = (add_good / count_good) ** 0.5
d_bad = (add_bad / count_bad) ** 0.5
P_good[i] = P_continue(value, even_good, d_good)
print(value, even_good, d_good)
P_bad[i] = P_continue(value, even_bad, d_bad)
return P_good, P_bad
# 判断样本好坏瓜
test = np.array([1,1,1,1,1,1,0.697,0.460])
P_good, P_bad = P_kind(test)
print(P_kind(test))
mul_good, mul_bad = 1, 1# 记录为好瓜或者怀瓜的概率乘积,公共部分不做运算
for i in P_good:
mul_good *= i
for i in P_bad:
mul_bad *= i
if mul_good > mul_bad :
print("好瓜!")
else:
print("坏瓜!")
朴素贝叶斯分类器
最新推荐文章于 2024-05-29 18:11:45 发布