决策树ID3-PlayTennis

最新推荐文章于 2024-02-04 18:42:24 发布

她家的猫

最新推荐文章于 2024-02-04 18:42:24 发布

阅读量848

点赞数 1

分类专栏：没人看系列文章标签：决策树 python 人工智能

本文链接：https://blog.csdn.net/qq_40948862/article/details/120392063

版权

没人看系列专栏收录该内容

59 篇文章 3 订阅

订阅专栏

最近在看决策树,在B站上看到了一个前辈的讲课视频
讲的非常详细，于是自己手动实现了一下基于ID3的决策树

说来惭愧，我是新手，所以并没有导包，纯原始python写的。也并没有画出最后决策树的构建图。

只是让我对这个决策树更加了解一些，后续学到引入外部包，再说。

#!/usr/bin/env python 
# -*- coding:utf-8 -*-
import math
import numpy as np

# 训练集--构建决策树
data = [
    ['Sunny', 'Hot', 'High', 'Weak', 'No'],
    ['Sunny', 'Hot', 'High', 'Strong', 'No'],
    ['Overcast', 'Hot', 'High', 'Weak', 'Yes'],
    ['Rain', 'Mild', 'High', 'Weak', 'Yes'],
    ['Rain', 'Cool', 'Normal', 'Weak', 'Yes'],
    ['Rain', 'Cool', 'Normal', 'Strong', 'No'],
    ['Overcast', 'Cool', 'Normal', 'Strong', 'Yes'],
    ['Sunny', 'Mild', 'High', 'Weak', 'No'],
    ['Sunny', 'Cool', 'Normal', 'Weak', 'Yes'],
    ['Rain', 'Mild', 'Normal', 'Weak', 'Yes'],
    ['Sunny', 'Mild', 'Normal', 'Strong', 'Yes'],
    ['Overcast', 'Mild', 'High', 'Strong', 'Yes'],
    ['Overcast', 'Hot', 'Normal', 'Weak', 'Yes'],
    ['Rain', 'Mild', 'High', 'Strong', 'No']
]
columns = ['Outlook', 'Temperature', 'Humidity', 'Wind']
columns_index = {
    'Outlook': 0,
    'Temperature': 1,
    'Humidity': 2,
    'Wind': 3,
}


# 第1步计算决策属性的熵
def calculate_entropy(path):
    # print('path', path)
    decision_entropy = 0
    decision_calculate = {}
    filtered_data = []
    for line in data:
        #     if满足条件
        satisfy = True
        for column in path:
            if path[column] != line[columns_index[column]]:
                satisfy = False
                break
        if satisfy:
            filtered_data.append(line)
    # print(filtered_data)
    for line in filtered_data:
        count = decision_calculate.get(line[-1])
        if count is None:
            count = 0
        count += 1
        decision_calculate[line[-1]] = count
    # print(decision_calculate)
    if len(filtered_data) > 0:
        for decision in decision_calculate:
            decision_calculate[decision] /= len(filtered_data) * 1.0
            decision_entropy -= decision_calculate[decision] * math.log(decision_calculate[decision], 2)

    return decision_entropy, filtered_data


# 第2步计算条件属性的熵
# 条件属性共有4个:
# Outlook、 Temperature、 Humidity、 Wind。
# 分别计算不同属性的信息增益。
#     计算Outlook中各个属性的条件熵
#     Outlook共分三个组:
#     Sunny(D1)、Overcast(D2)、 Rain(D3)
#       Sunny
def child_node(parent_score, nodes, node_data, path):
    # node_data 根据 node 分组
    node_dict = {}
    for line in node_data:
        for node in nodes:
            attributes = node_dict.get(node)
            if attributes is None:
                attributes = {}
            attribute = attributes.get(line[columns_index[node]])
            if attribute is None:
                attribute = {}
            num = attribute.get(line[-1])
            if num is None:
                num = 1
            else:
                num += 1
            attribute[line[-1]] = num
            attribute['count'] = 1 if attribute.get('count') is None else attribute.get('count') + 1
            attributes[line[columns_index[node]]] = attribute
            node_dict[node] = attributes

    # print(node_dict)
    # 计算Outlook中各个属性的条件熵
    root = next(iter(node_dict))
    increment = 0

    for node in node_dict:
        node_score = []
        weight = []
        # print('node :', node, end='')
        for attribute in node_dict[node]:
            # print(' attribute :', attribute, end='')
            current_path = path.copy()
            current_path[node] = attribute
            decision_entropy, filtered_data = calculate_entropy(current_path)
            # print(' attribute_score :', decision_entropy)
            node_score.append(decision_entropy)
            weight.append((node_dict[node][attribute]['count'] / len(node_data)))
        # print('node_score', node_score)
        # print('weight', weight)
        node_score = sum(np.multiply(node_score, weight))
        # print('node_score', node_score)
        if parent_score - node_score > increment:
            increment = parent_score - node_score
            root = node
        # print('increment', parent_score - node_score)

    # print('choose :', root, ' increment:', increment)
    return root, node_dict[root]


def find_attribute(root, attributes, path, tree_node):
    # print('choose', root, 'attributes', attributes)
    # print('tree_node', tree_node.name, 'root', root)
    for attribute in attributes:
        # print('attribute... : ', attribute)
        path[root] = attribute
        # find node
        entropy, node_data = calculate_entropy(path)
        # print('filtered_data', node_data)
        # print('entropy', entropy)

        attribute_node = Node(attribute, tree_node, [])
        tree_node.next.append(attribute_node)

        if entropy == 0.0:
            attribute_node.next.append(Node(node_data[-1][-1], next, None))
            # print(node_data[-1][-1])

        elif len(node_data) > 0:
            node, attributes = child_node(entropy, columns, node_data, path)
            # find attribute

            temp_node = Node(node, next, [])
            attribute_node.next.append(temp_node)
            # print('choose', node, 'attributes', attributes)
            find_attribute(node, attributes, path, temp_node)

        path.pop(root)


class Node:
    def __init__(self, name, before, next):
        self.next = next
        self.before = before
        self.name = name


# 决策树构建完成后，进行预测
def predict(root, line):
    if root.next is None:
        print('result', root.name)
    column = root.name
    attribute = line.get(column)
    if attribute is not None:
        for next in root.next:
            if next.name == attribute:
                for next2 in next.next:
                    predict(next2, line)


if __name__ == '__main__':
    # {'Outlook': 'Sunny', 'Temperature': 'Hot'}
    path = {}
    # find node
    entropy, node_data = calculate_entropy(path)
    root, attributes = child_node(entropy, columns, node_data, path)
    head = Node(root, None, [])
    # find attribute
    find_attribute(root, attributes, {}, head)

    test_data = [
        {
            'Temperature': 'Hot',
            'Humidity': 'High',
            'Wind': 'Weak',
            'Outlook': 'Sunny'
        },
        {
            'Outlook': 'Overcast',
            'Temperature': 'Hot',
            'Humidity': 'High',
            'Wind': 'Weak'
        },
    ]
    for line in test_data:
        print(line)
        predict(head, line)

她家的猫

关注

1
点赞
踩
5

收藏

觉得还不错? 一键收藏
0
评论
决策树ID3-PlayTennis

最近在看决策树,在B站上看到了一个前辈的讲课视频讲的非常详细，于是自己手动实现了一下基于ID3的决策树说来惭愧，我是新手，所以并没有导包，纯原始python写的。也并没有画出最后决策树的构建图。只是让我对这个决策树更加了解一些，后续学到引入外部包，再说。#!/usr/bin/env python # -*- coding:utf-8 -*-import mathimport numpy as np# 训练集--构建决策树data = [ ['Sunny', 'Hot', 'Hig
复制链接

扫一扫