机器学习与数据挖掘-实验六

实现拉普拉斯修正的朴素贝叶斯分类器

编码实现拉普拉斯修正的朴素贝叶斯分类器,基于给定的训练数据,对测试样本进行判别。

import numpy as np

def loaddata():
    X = np.array([[1,'S'],[1,'M'],[1,'M'],[1,'S'],
         [1, 'S'], [2, 'S'], [2, 'M'], [2, 'M'],
         [2, 'L'], [2, 'L'], [3, 'L'], [3, 'M'],
         [3, 'M'], [3, 'L'], [3, 'L']])
    y = np.array([-1,-1,1,1,-1,-1,-1,1,1,1,1,1,1,1,-1])
    return X, y

# 训练、计算各个概率值
def Train(trainset, train_labels):
    # 数据量
    m = trainset.shape[0]
    # 特征数
    n = trainset.shape[1]
    # 先验概率,key是类别值,value是类别的概率值
    prior_probability = {}
    # 条件概率,key的构造:类别,特征,特征值,value是
    conditional_probability = {}

    # 类别的可能取值
    labels = set(train_labels)
    # 计算先验概率,此时没有计算总数据量m
    for label in labels:
        prior_probability[label] = len(train_labels[train_labels == label]) + 1
    print('prior_probabilit =', prior_probability)

    # 计算条件概率
    for i in range(m):
        for j in range(n):
            # key的构造:类别,特征,特征值
            key = str(train_labels[i]) + ',' + str(j) + ',' + str(trainset[i][j])
            if key in conditional_probability:
                conditional_probability[key] += 1
            else:
                conditional_probability[key] = 1
    print('conditional_probability = ', conditional_probability)

    # 因字典在循环时不能改变,故定义新字典来保存值
    conditional_probability_final = {}
    for key in conditional_probability:
        # 取出当前的类别
        label = key.split(',')[0]
        key1 = key.split(',')[1]
        Ni = len(set(trainset[:, int(key1)]))
        print(Ni)
        conditional_probability_final[key] = (conditional_probability[key] + 1) / (prior_probability[int(label)] +Ni)

    # 最终先验概率(除以总数据量m)
    for label in labels:
        prior_probability[label] = prior_probability[label] / (m + len(labels))

    return prior_probability, conditional_probability_final, labels

# 定义预测函数
def predict(data):
    result = {}
    # 循环标签
    for label in train_labels_set:
        temp = 1.0
        for j in range(len(data)):
            key = str(label) + ',' + str(j) + ',' + str(data[j])
            # 条件概率连乘
            temp = temp * conditional_probability[key]
        # 在乘上先验概率
        result[label] = temp * prior_probability[label]
    print('result =', result)
    # 排序返回标签值
    return sorted(result.items(), key=lambda x: x[1], reverse=True)[0][0]

X,y = loaddata()
prior_probability,conditional_probability,train_labels_set = Train(X,y)
print('conditional_probability = ', conditional_probability)
r_label = predict([2,'S'])
print(' r_label =', r_label)

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值