哈哈哈哈哈哈哈哈

import math

 

 

def create_tree(data, attributes, max_depth=float('inf'), min_samples=1, depth=0):

    if len(data) == 0:

        return None

    if all_same(data):

        return data[0][-1]

    if len(attributes) == 1 or depth >= max_depth:

        return majority_class(data)

    

    best_attribute = choose_best_attribute(data, attributes)

    tree = {best_attribute: {}}

    attribute_values = get_attribute_values(data, best_attribute)

    for value in attribute_values:

        sub_data = get_sub_data(data, best_attribute, value)

        if len(sub_data) < min_samples:

            tree[best_attribute][value] = majority_class(sub_data)

        else:

            sub_attributes = attributes[:]

            sub_attributes.remove(best_attribute)

            tree[best_attribute][value] = create_tree(sub_data, sub_attributes, max_depth, min_samples, depth+1)

    

    return tree

 

 

def all_same(data):

    label = data[0][-1]

    for sample in data:

        if sample[-1] != label:

            return False

    return True

 

 

def majority_class(data):

    label_count = {}

    for sample in data:

        label = sample[-1]

        if label in label_count:

            label_count[label] += 1

        else:

            label_count[label] = 1

    return max(label_count, key=label_count.get)

 

 

def choose_best_attribute(data, attributes):

    best_gain = 0

    best_attribute = None

    for attribute in attributes:

        gain = calculate_gain(data, attribute)

        if gain > best_gain:

            best_gain = gain

            best_attribute = attribute

    return best_attribute

 

 

def calculate_gain(data, attribute):

    gain = calculate_entropy(data)

    attribute_values = get_attribute_values(data, attribute)

    for value in attribute_values:

        sub_data = get_sub_data(data, attribute, value)

        prob = len(sub_data) / len(data)

        gain -= prob * calculate_entropy(sub_data)

    return gain

 

 

def calculate_entropy(data):

    label_count = {}

    for sample in data:

        label = sample[-1]

        if label in label_count:

            label_count[label] += 1

        else:

            label_count[label] = 1

 

    entropy = 0

    for count in label_count.values():

        prob = count / len(data)

        entropy -= prob * math.log(prob, 2)

    return entropy

 

 

def get_attribute_values(data, attribute):

    values = []

    for sample in data:

        value = sample[attribute]

        if value not in values:

            values.append(value)

    return values

 

 

def get_sub_data(data, attribute, value):

    sub_data = []

    for sample in data:

        if sample[attribute] == value:

            sub_data.append(sample)

    return sub_data

 

 

def classify(tree, sample):

    if isinstance(tree, str):

        return tree

    root = list(tree.keys())[0]

    value = sample[root]

    subtree = tree[root][value]

    return classify(subtree, sample)

 

 

def accuracy(tree, data):

    correct_count = 0

    for sample in data:

        if classify(tree, sample) == sample[-1]:

            correct_count += 1

    return correct_count / len(data)

 

 

data1 = [

    ['青绿', '蜷缩', '浊响', '清晰', '凹陷', '硬滑', '好瓜'],

    ['乌黑', '蜷缩', '沉闷', '清晰', '凹陷', '硬滑', '好瓜'],

    ['乌黑', '蜷缩', '浊响', '清晰', '凹陷', '硬滑', '好瓜'],

    ['青绿', '稍蜷', '浊响', '清晰', '稍凹', '软粘', '好瓜'],

    ['乌黑', '稍蜷', '浊响', '稍糊', '稍凹', '软粘', '好瓜'],

    ['乌黑', '稍蜷', '沉闷', '稍糊', '稍凹', '硬滑', '坏瓜'],

    ['青绿', '硬挺', '清脆', '清晰', '平坦', '软粘', '坏瓜'],

    ['浅白', '稍蜷', '沉闷', '稍糊', '凹陷', '硬滑', '坏瓜'],

    ['乌黑', '稍蜷', '浊响', '清晰', '稍凹', '软粘', '坏瓜'],

    ['浅白', '蜷缩', '浊响', '模糊', '平坦', '硬滑', '坏瓜'],

    ['青绿', '蜷缩', '沉闷', '稍糊', '稍凹', '硬滑', '坏瓜']

]

 

data2 = [

    ['青绿', '蜷缩', '沉闷', '清晰', '凹陷', '硬滑', '好瓜'],

    ['浅白', '蜷缩', '浊响', '清晰', '凹陷', '硬滑', '好瓜'],

    ['乌黑', '稍蜷', '浊响', '清晰', '稍凹', '硬滑', '好瓜'],

    ['乌黑', '稍蜷', '沉闷', '稍糊', '稍凹', '硬滑', '坏瓜'],

    ['浅白', '硬挺', '清脆', '模糊', '平坦', '硬滑', '坏瓜'],

    ['浅白', '蜷缩', '浊响', '模糊', '平坦', '软粘', '坏瓜'],

    ['青绿', '稍蜷', '浊响', '稍糊', '凹陷', '硬滑', '坏瓜']

]

 

attributes = [0, 1, 2, 3, 4, 5]

 

max_depth = 4

min_samples = 3

 

tree = create_tree(data1, attributes, max_depth, min_samples)

print("决策树:", tree)

print("分类结果:")

for sample in data2:

    result = classify(tree, sample)

    print(sample, "->", result)

print("分类精度:", accuracy(tree, data2))

评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值