朴素贝叶斯-python实现

最新推荐文章于 2024-04-24 01:16:17 发布

她家的猫

最新推荐文章于 2024-04-24 01:16:17 发布

阅读量222

点赞数

分类专栏：没人看系列文章标签： python 人工智能算法

本文链接：https://blog.csdn.net/qq_40948862/article/details/120372649

版权

没人看系列专栏收录该内容

59 篇文章 4 订阅

订阅专栏

参考文章

#!/usr/bin/env python 
# -*- coding:utf-8 -*-
# 原文章 https://blog.csdn.net/qq_38178543/article/details/107568803
# 数据集

columns = ['outlook', 'Temperature', 'Humidity', 'Wind', 'PlayTennis']

data = [["Sunny", "Hot", "High", "Weak", "No"],
        ["Sunny", "Hot", "High", "Strong", "No"],
        ["Overcast", "Hot", "High", "Weak", "Yes"],
        ["Rain", "Mild", "High", "Weak", "Yes"],
        ["Rain", "Cool", "Normal", "Weak", "Yes"],
        ["Rain", "Cool", "Normal", "Strong", "No"],
        ["Overcast", "Cool", "Normal", "Strong", "Yes"],
        ["Sunny", "Mild", "High", "Weak", "No"],
        ["Sunny", "Cool", "Normal", "Weak", "Yes"],
        ["Rain", "Mild", "Normal", "Weak", "Yes"],
        ["Sunny", "Mild", "Normal", "Strong", "Yes"],
        ["Overcast", "Mild", "High", "Strong", "Yes"],
        ["Overcast", "Hot", "Normal", "Weak", "Yes"],
        ["Rain", "Mild", "High", "Strong", "No"],
        ]


# 计算标签个数 {'No': 4, 'Yes': 6}
def calculate_result(train_data):
    result_num = {}
    for line in train_data:
        label = line[len(line) - 1]
        num = result_num.get(label)
        if num is None:
            num = 0
        result_num[label] = num + 1

    return result_num


# 计算属性个数
# 'Sunny': {'No': 3, 'Yes': 1, 'column': 4}, 'Hot': {'No': 2, 'Yes': 1}...
def calculate_attribute(train_data):
    attribute_dict = {}
    for line in train_data:
        result = line.pop()
        for index in range(len(line)):
            num = attribute_dict.get(line[index])
            if num is None:
                num = {result: 1, 'index': index}
            else:
                value = num.get(result)
                if value is None:
                    num[result] = 1
                else:
                    num[result] = num[result] + 1

            attribute_dict[line[index]] = num

    return attribute_dict


def calculate_prior(train_data, result_num, attribute_dict):
    column_num = {}
    for attribute in attribute_dict:
        num = attribute_dict[attribute]
        attributes = column_num.get(num['index'])
        if attributes is None:
            attributes = {attribute}
        else:
            attributes.add(attribute)

        column_num[num['index']] = attributes
    print(column_num)

    for attribute in attribute_dict:
        label_num = attribute_dict[attribute]
        for result in result_num:
            if label_num.get(result):
                label_num[result] = (label_num[result] + 1) / (result_num[result] + len(
                    column_num[label_num['index']])) * 1.0
            else:
                label_num[result] = 1 / (result_num[result] + len(
                    column_num[label_num['index']])) * 1.0

    for label in result_num:
        result_num[label] = (result_num[label] + 1) / (len(train_data) + len(result_num)) * 1.0

    return result_num, attribute_dict


def bayes_test(data, attribute_dict, result_num):
    for line in data:
        # print(line.pop())
        line.pop()
        print('预测:', line)
        max_value = 0
        choice = ''
        for result in result_num:
            print(result, end=': ')
            p = result_num[result]
            for attribute in line:
                # print(attribute_dict[attribute])
                p *= attribute_dict[attribute][result]
            print(p, end=' ')
            if p > max_value:
                max_value = p
                choice = result
        print('预测结果 : ', choice, 'score', max_value)


if __name__ == '__main__':
    train_data = data[:10]

    result_num = calculate_result(train_data)

    attribute_dict = calculate_attribute(train_data)

    result_num, attribute_dict = calculate_prior(train_data, result_num, attribute_dict)

    bayes_test(data[10:], attribute_dict, result_num)

她家的猫

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
朴素贝叶斯-python实现

参考文章#!/usr/bin/env python # -*- coding:utf-8 -*-# 原文章 https://blog.csdn.net/qq_38178543/article/details/107568803# 数据集columns = ['outlook', 'Temperature', 'Humidity', 'Wind', 'PlayTennis']data = [["Sunny", "Hot", "High", "Weak", "No"], ["Su
复制链接

扫一扫