朴素贝叶斯分类器

朴素贝叶斯也称为笨蛋贝叶斯,其思想简单,容易理解

简单理解就是对数据的每一个属性当作独立分布

例如属性颜色有青绿,乌黑,浅白,则分别求出好瓜和坏瓜中三种颜色的概率,对于离散属性可以直接求其概率,对于连续的属性值可以通过正态分布将其转化为离散值。

这样对所有的属性概率求过以后,对于一个任意样本,例:

A:(青绿,蜷缩,,浊响,清晰,凹陷,硬滑,密度,含糖率)

有P(好)= P(青绿&好)*P(蜷缩&好)*P(浊响&好)*P(清晰&好)*P(凹陷&好)*P(硬滑&好)*P(密度&好)*P(含糖率&好)

P(坏)= P(青绿&坏)*P(蜷缩&坏)*P(浊响&坏)*P(清晰&坏)*P(凹陷&坏)*P(硬滑&坏)*P(密度&坏)*P(含糖率&坏)

最后比较好瓜和坏瓜的概率,谁大就归为哪一类

代码如下:

#数据集     3       3      3       3      3       2     密度   含糖率  标签
data = [["青绿", "蜷缩", "浊响", "清晰", "凹陷", "硬滑", 0.697, 0.460, 1],
        ["乌黑", "蜷缩", "沉闷", "清晰", "凹陷", "硬滑", 0.774, 0.376, 1],
        ["乌黑", "蜷缩", "浊响", "清晰", "凹陷", "硬滑", 0.634, 0.264, 1],
        ["青绿", "蜷缩", "沉闷", "清晰", "凹陷", "硬滑", 0.608, 0.318, 1],
        ["浅白", "蜷缩", "浊响", "清晰", "凹陷", "硬滑", 0.556, 0.215, 1],
        ["青绿", "稍蜷", "浊响", "清晰", "稍凹", "软粘", 0.403, 0.237, 1],
        ["乌黑", "稍蜷", "浊响", "稍糊", "稍凹", "软粘", 0.481, 0.149, 1],
        ["乌黑", "蜷缩", "浊响", "清晰", "稍凹", "硬滑", 0.437, 0.211, 1],
        ["乌黑", "稍蜷", "沉闷", "稍糊", "稍凹", "硬滑", 0.666, 0.091, 0],
        ["青绿", "硬挺", "清脆", "清晰", "平坦", "软粘", 0.243, 0.267, 0],
        ["浅白", "硬挺", "清脆", "模糊", "平坦", "硬滑", 0.245, 0.057, 0],
        ["浅白", "蜷缩", "浊响", "稍糊", "平坦", "软粘", 0.343, 0.099, 0],
        ["青绿", "稍蜷", "浊响", "稍糊", "凹陷", "硬滑", 0.639, 0.161, 0],
        ["浅白", "稍蜷", "沉闷", "稍糊", "凹陷", "硬滑", 0.657, 0.198, 0],
        ["乌黑", "稍蜷", "浊响", "清晰", "稍凹", "软粘", 0.360, 0.370, 0],
        ["浅白", "蜷缩", "浊响", "稍糊", "平坦", "硬滑", 0.593, 0.042, 0],
        ["青绿", "蜷缩", "沉闷", "稍糊", "稍凹", "硬滑", 0.719, 0.103, 0]]

data_tr = []        #训练集
data_test = []      #测试集

#划分训练集与测试集
for i in range(len(data)):
    if i%3 == 0:
        data_test.append(data[i])
    else:
        data_tr.append(data[i])

#好瓜坏瓜个数
gua = len(data_tr)
hao_gua = 0
huai_gua = 0
for i in data_tr:
    if i[8] == 1:
        hao_gua += 1
    else:
        huai_gua += 1



#三属性数量
def num_gua(list,shuxing,a,b,c,):
    na1 = 0
    nb1 = 0
    nc1 = 0
    na0 = 0
    nb0 = 0
    nc0 = 0
    for i in list:
        if i[shuxing] == a and i[8] == 1:
            na1 += 1
        elif i[shuxing] == b and i[8] == 1:
            nb1 += 1
        elif i[shuxing] == c and i[8] == 1:
            nc1 += 1
        elif i[shuxing] == a and i[8] == 0:
            na0 += 1
        elif i[shuxing] == a and i[8] == 0:
            nb0 += 1
        else:
            nc0 += 1
    return na1, nb1, nc1, na0, nb0, nc0


#两属性数量
def num_two(list, shuxing, a, b):
    na1 = 0
    nb1 = 0
    na0 = 0
    nb0 = 0
    for i in list:
        if i[shuxing] == a and i[8] == 1:
            na1 += 1
        elif i[shuxing] == b and i[8] == 1:
            nb1 += 1
        elif i[shuxing] == a and i[8] == 0:
            na0 += 1
        else:
            nb0 += 1

    return na1, nb1, na0, nb0

#先验概率
def gailv(list):
    gailvlist = []
    for i in range(2):
        gailvlist.append(list[i]/hao_gua)
    for i in range(2, len(list)):
        gailvlist.append(list[i]/huai_gua)
    return gailvlist


#正态分布
def gaosi(list):
    long = len(list)
    num = 0
    for i in list:
        num += i
    u = num/long    #确定u的值
    y = 0
    for j in list:
        xj = (j - u)**2
        y += xj
    s = y/long
    satnd = s**0.5
    return u, satnd

#连续值的先验概率
def proba(u, satand, x):
    if (x >= (u - satand)) and (x <= (u + satand)):
        m = 0.682689
    elif (x < (u - satand) and x >= (u - 2*satand)) or (x >= (u + satand) and x <= (u + 2*satand)):
        m = 0.271711
    else:
        m = 0.0456
    return m


#求累积 后验概率
def product(list):
    pro = 1
    for i in list:
        pro *= i
    return pro

# 颜色
color = []
color = num_gua(data_tr, 0, "青绿", "乌黑", "浅白")
P_color = gailv(color)



#蜷缩
square = []
square = num_gua(data_tr, 1, "蜷缩", "稍蜷", "硬挺")
P_quan = gailv(square)


#声音
voice = []
voice = num_gua(data_tr, 2, "浊响", "沉闷", "清脆")
P_voice = gailv(voice)


#花纹
flower = []
flower = num_gua(data_tr, 3, "清晰", "稍糊", "模糊")
P_flower = gailv(flower)

#瓜蒂
base = []
base = num_gua(data_tr, 4, "凹陷", "稍凹", "平坦")
P_base = gailv(base)


# 手感
feel = []
feel = num_two(data_tr, 5, "硬滑", "软粘")
P_feel = []
for i in range(1):
    P_feel.append(feel[i]/hao_gua)
for i in range(1,len(feel)):
    P_feel.append(feel[i]/huai_gua)



#密度
midu_hao = []
midu_huai = []
for i in data_tr:
    if i[8] == 1:
        midu_hao.append(i[6])
    else:
        midu_huai.append(i[6])

mi_u_hao, mi_satand_hao = gaosi(midu_hao)
mi_u_huai, mi_satand_huai = gaosi(midu_huai)




#含糖率
sugur_hao = []
sugur_huai = []
for i in data_tr:
    if i[8] == 1:
        sugur_hao.append(i[7])
    else:
        sugur_huai.append(i[7])

han_u_hao, han_satand_hao = gaosi(sugur_hao)
han_u_huai, han_satand_huai = gaosi(sugur_huai)

#训练函数
def train(list):
    data_hao = []
    data_huai = []
    if list[0] == "青绿":
        data_hao.append(P_color[0])
        data_huai.append(P_color[3])
    elif list[0] == "乌黑":
        data_hao.append(P_color[1])
        data_huai.append(P_color[4])
    else:
        data_hao.append(P_color[2])
        data_huai.append(P_color[5])

    if list[1] == "蜷缩":
        data_hao.append(P_quan[0])
        data_huai.append(P_quan[3])
    elif list[1] == "稍蜷":
        data_hao.append(P_quan[1])
        data_huai.append(P_quan[4])
    else:
        data_hao.append(P_quan[2])
        data_huai.append(P_quan[5])

    if list[2] == "浊响":
        data_hao.append(P_voice[0])
        data_huai.append(P_voice[3])
    elif list[2] == "沉闷":
        data_hao.append(P_voice[1])
        data_huai.append(P_voice[4])
    else:
        data_hao.append(P_voice[2])
        data_huai.append(P_voice[5])

    if list[3] == "清晰":
        data_hao.append(P_flower[0])
        data_huai.append(P_flower[3])
    elif list[3] == "稍糊":
        data_hao.append(P_flower[1])
        data_huai.append(P_flower[4])
    else:
        data_hao.append(P_flower[2])
        data_huai.append(P_flower[5])

    if list[4] == "凹陷":
        data_hao.append(P_base[0])
        data_huai.append(P_base[3])
    elif list[4] == "稍凹":
        data_hao.append(P_base[1])
        data_huai.append(P_base[4])
    else:
        data_hao.append(P_base[2])
        data_huai.append(P_base[5])

    if list[5] == "硬滑":
        data_hao.append(P_feel[0])
        data_huai.append(P_feel[2])
    else:
        data_hao.append(P_feel[1])
        data_huai.append(P_feel[3])

    data_hao.append(proba(mi_u_hao, mi_satand_hao, list[6]))
    data_huai.append(proba(mi_u_huai, mi_satand_huai, list[6]))

    data_hao.append(proba(han_u_hao, han_satand_hao, list[7]))
    data_huai.append(proba(han_u_huai, han_satand_huai, list[7]))

    hao = product(data_hao)
    huai = product(data_huai)

    if hao >= huai:
        return 1
    if hao < huai:
        return 0

def xunlian(list):
    m = train(list)
    if m == 1:
        print(list, "    好瓜")
    else:
        print(list, "    坏瓜")

for i in data_test:
    xunlian(i)










  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
朴素贝叶斯分类器(Naive Bayes classifier)是一种常用的概率分类方法,它基于贝叶斯理论和特征独立假设。朴素贝叶斯分类器有着简单高效的特点,在文本分类、垃圾邮件过滤、情感分析等领域都有广泛应用。 朴素贝叶斯分类器的基本原理是利用训练集的特征和对应的分类标签构建生成模型,然后根据测试样本的特征,通过计算后验概率来进行分类预测。具体而言,朴素贝叶斯分类器假设特征之间相互独立,基于此假设,可以通过训练集中特征在各个类别下的条件概率来计算样本在不同类别下的后验概率,并选择后验概率最大的类别作为分类结果。 朴素贝叶斯分类器的训练过程包括两个步骤:首先是计算各个类别的先验概率,即每个类别在训练集中的出现频率;然后是计算每个特征在各个类别下的条件概率,即给定一个类别时,特征的条件概率。在得到先验概率和条件概率后,可以通过贝叶斯公式计算后验概率。 朴素贝叶斯分类器的优点在于对小规模数据集具有较好的分类性能,且能够处理多类别分类问题。而其缺点则是对于特征之间的相关性较为敏感,当特征之间存在强相关性时,朴素贝叶斯分类器的性能会下降。 总的来说,朴素贝叶斯分类器是一种简单而有效的分类方法,它在许多实际应用中表现出色。其理论基础扎实,实现相对简单,适用于处理小规模数据集的分类问题。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值