朴素贝叶斯-python实现

参考文章

#!/usr/bin/env python 
# -*- coding:utf-8 -*-
# 原文章 https://blog.csdn.net/qq_38178543/article/details/107568803
# 数据集

columns = ['outlook', 'Temperature', 'Humidity', 'Wind', 'PlayTennis']

data = [["Sunny", "Hot", "High", "Weak", "No"],
        ["Sunny", "Hot", "High", "Strong", "No"],
        ["Overcast", "Hot", "High", "Weak", "Yes"],
        ["Rain", "Mild", "High", "Weak", "Yes"],
        ["Rain", "Cool", "Normal", "Weak", "Yes"],
        ["Rain", "Cool", "Normal", "Strong", "No"],
        ["Overcast", "Cool", "Normal", "Strong", "Yes"],
        ["Sunny", "Mild", "High", "Weak", "No"],
        ["Sunny", "Cool", "Normal", "Weak", "Yes"],
        ["Rain", "Mild", "Normal", "Weak", "Yes"],
        ["Sunny", "Mild", "Normal", "Strong", "Yes"],
        ["Overcast", "Mild", "High", "Strong", "Yes"],
        ["Overcast", "Hot", "Normal", "Weak", "Yes"],
        ["Rain", "Mild", "High", "Strong", "No"],
        ]


# 计算标签个数 {'No': 4, 'Yes': 6}
def calculate_result(train_data):
    result_num = {}
    for line in train_data:
        label = line[len(line) - 1]
        num = result_num.get(label)
        if num is None:
            num = 0
        result_num[label] = num + 1

    return result_num


# 计算属性个数
# 'Sunny': {'No': 3, 'Yes': 1, 'column': 4}, 'Hot': {'No': 2, 'Yes': 1}...
def calculate_attribute(train_data):
    attribute_dict = {}
    for line in train_data:
        result = line.pop()
        for index in range(len(line)):
            num = attribute_dict.get(line[index])
            if num is None:
                num = {result: 1, 'index': index}
            else:
                value = num.get(result)
                if value is None:
                    num[result] = 1
                else:
                    num[result] = num[result] + 1

            attribute_dict[line[index]] = num

    return attribute_dict


def calculate_prior(train_data, result_num, attribute_dict):
    column_num = {}
    for attribute in attribute_dict:
        num = attribute_dict[attribute]
        attributes = column_num.get(num['index'])
        if attributes is None:
            attributes = {attribute}
        else:
            attributes.add(attribute)

        column_num[num['index']] = attributes
    print(column_num)

    for attribute in attribute_dict:
        label_num = attribute_dict[attribute]
        for result in result_num:
            if label_num.get(result):
                label_num[result] = (label_num[result] + 1) / (result_num[result] + len(
                    column_num[label_num['index']])) * 1.0
            else:
                label_num[result] = 1 / (result_num[result] + len(
                    column_num[label_num['index']])) * 1.0

    for label in result_num:
        result_num[label] = (result_num[label] + 1) / (len(train_data) + len(result_num)) * 1.0

    return result_num, attribute_dict


def bayes_test(data, attribute_dict, result_num):
    for line in data:
        # print(line.pop())
        line.pop()
        print('预测:', line)
        max_value = 0
        choice = ''
        for result in result_num:
            print(result, end=': ')
            p = result_num[result]
            for attribute in line:
                # print(attribute_dict[attribute])
                p *= attribute_dict[attribute][result]
            print(p, end=' ')
            if p > max_value:
                max_value = p
                choice = result
        print('预测结果 : ', choice, 'score', max_value)


if __name__ == '__main__':
    train_data = data[:10]

    result_num = calculate_result(train_data)

    attribute_dict = calculate_attribute(train_data)

    result_num, attribute_dict = calculate_prior(train_data, result_num, attribute_dict)

    bayes_test(data[10:], attribute_dict, result_num)

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值