朴素贝叶斯分类器

import pandas as pd
import numpy as np
import math

# 先拿数据,算先验概率在算条件概率,对比得出结论
# 色泽(1青绿,2浅白,乌黑);根蒂(1蜷缩,2稍蜷,3硬挺);敲声(1浊响,2沉闷,3清脆)
# 纹理(1清晰,2稍糊,3模糊);脐部(1凹陷,2稍凹,3平坦);触感(1硬滑,2软粘)
dataTrain = pd.read_csv("data/dataList.txt")
input, output = dataTrain.iloc[:, 0:8], dataTrain.iloc[:, 8]

# 求概率
def probability (x, y):
    return x * 1.0 / y

# 求概率密度
def P_continue (x, even, d):
#     方差d, 均值even
    return 1 / (d * (2 * math.pi) ** (1/2)) * math.e ** (- ((x - even) ** 2 / (2 * d **2)))

# 好瓜坏瓜在y瓜中的比例
def P_gua (y):
    count, count_bad, count_good =0, 0, 0
    for i in y:
        count += 1
        if i == 0:
            count_bad += 1
        else:
            count_good += 1
    return count_good, count_bad, probability(count_good, count), probability(count_bad, count)

# 好瓜数量,怀瓜数量,好瓜概率,坏瓜概率
count_good, count_bad, Pg, Pb = P_gua(output)

# 每个特征的概率p(清脆|好瓜)
def P_kind (jugde):
    for i, value in enumerate(jugde):
        # 样本中第i个属性
        if jugde[i] >= 1:
            sum_good, sum_bad = 0, 0  # 好瓜/坏瓜中第i个属性与样本一样的数量
            for row, index in dataTrain.iterrows():
                if value == index[i] and index[-1] == 0:
                    sum_bad += 1
                elif value == index[i] and index[-1] == 1:
                    sum_good += 1
            P_good[i] = probability(sum_good, count_good)
            P_bad[i] = probability(sum_bad, count_bad)
        else:
            add_good, add_bad = 0, 0
            sum_good, sum_bad = 0, 0  # 好瓜/坏瓜中第i个属性与样本一样的数量
            for row, index in dataTrain.iterrows():
                # 计算均值
                if index[-1] == 1:
                    sum_good += index[i]
                else:
                    sum_bad += index[i]
            even_good = sum_good / count_good
            even_bad = sum_bad / count_bad
            for row, index in dataTrain.iterrows():
                # 计算方差
                if index[-1] == 1:
                    add_good += (value - even_good) ** 2
                else:
                    add_bad += (value - even_bad) ** 2
            d_good = (add_good / count_good) ** 0.5
            d_bad = (add_bad / count_bad) ** 0.5
            P_good[i] = P_continue(value, even_good, d_good)
            print(value, even_good, d_good)
            P_bad[i] = P_continue(value, even_bad, d_bad)
    return P_good, P_bad

# 判断样本好坏瓜
test = np.array([1,1,1,1,1,1,0.697,0.460])
P_good, P_bad = P_kind(test)
print(P_kind(test))
mul_good, mul_bad = 1, 1# 记录为好瓜或者怀瓜的概率乘积,公共部分不做运算
for i in P_good:
    mul_good *= i
for i in P_bad:
    mul_bad *= i
if mul_good > mul_bad :
    print("好瓜!")
else:
    print("坏瓜!")

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值