哈哈哈哈哈哈哈哈

最新推荐文章于 2024-10-18 00:00:00 发布

白羊杆菌

最新推荐文章于 2024-10-18 00:00:00 发布

阅读量58

点赞数

文章标签： python

本文链接：https://blog.csdn.net/weixin_64809211/article/details/134398783

版权

import math

def create_tree(data, attributes, max_depth=float('inf'), min_samples=1, depth=0):

if len(data) == 0:

return None

if all_same(data):

return data[0][-1]

if len(attributes) == 1 or depth >= max_depth:

return majority_class(data)

best_attribute = choose_best_attribute(data, attributes)

tree = {best_attribute: {}}

attribute_values = get_attribute_values(data, best_attribute)

for value in attribute_values:

sub_data = get_sub_data(data, best_attribute, value)

if len(sub_data) < min_samples:

tree[best_attribute][value] = majority_class(sub_data)

else:

sub_attributes = attributes[:]

sub_attributes.remove(best_attribute)

tree[best_attribute][value] = create_tree(sub_data, sub_attributes, max_depth, min_samples, depth+1)

return tree

def all_same(data):

label = data[0][-1]

for sample in data:

if sample[-1] != label:

return False

return True

def majority_class(data):

label_count = {}

for sample in data:

label = sample[-1]

if label in label_count:

label_count[label] += 1

else:

label_count[label] = 1

return max(label_count, key=label_count.get)

def choose_best_attribute(data, attributes):

best_gain = 0

best_attribute = None

for attribute in attributes:

gain = calculate_gain(data, attribute)

if gain > best_gain:

best_gain = gain

best_attribute = attribute

return best_attribute

def calculate_gain(data, attribute):

gain = calculate_entropy(data)

attribute_values = get_attribute_values(data, attribute)

for value in attribute_values:

sub_data = get_sub_data(data, attribute, value)

prob = len(sub_data) / len(data)

gain -= prob * calculate_entropy(sub_data)

return gain

def calculate_entropy(data):

label_count = {}

for sample in data:

label = sample[-1]

if label in label_count:

label_count[label] += 1

else:

label_count[label] = 1

entropy = 0

for count in label_count.values():

prob = count / len(data)

entropy -= prob * math.log(prob, 2)

return entropy

def get_attribute_values(data, attribute):

values = []

for sample in data:

value = sample[attribute]

if value not in values:

values.append(value)

return values

def get_sub_data(data, attribute, value):

sub_data = []

for sample in data:

if sample[attribute] == value:

sub_data.append(sample)

return sub_data

def classify(tree, sample):

if isinstance(tree, str):

return tree

root = list(tree.keys())[0]

value = sample[root]

subtree = tree[root][value]

return classify(subtree, sample)

def accuracy(tree, data):

correct_count = 0

for sample in data:

if classify(tree, sample) == sample[-1]:

correct_count += 1

return correct_count / len(data)

data1 = [

['青绿', '蜷缩', '浊响', '清晰', '凹陷', '硬滑', '好瓜'],

['乌黑', '蜷缩', '沉闷', '清晰', '凹陷', '硬滑', '好瓜'],

['乌黑', '蜷缩', '浊响', '清晰', '凹陷', '硬滑', '好瓜'],

['青绿', '稍蜷', '浊响', '清晰', '稍凹', '软粘', '好瓜'],

['乌黑', '稍蜷', '浊响', '稍糊', '稍凹', '软粘', '好瓜'],

['乌黑', '稍蜷', '沉闷', '稍糊', '稍凹', '硬滑', '坏瓜'],

['青绿', '硬挺', '清脆', '清晰', '平坦', '软粘', '坏瓜'],

['浅白', '稍蜷', '沉闷', '稍糊', '凹陷', '硬滑', '坏瓜'],

['乌黑', '稍蜷', '浊响', '清晰', '稍凹', '软粘', '坏瓜'],

['浅白', '蜷缩', '浊响', '模糊', '平坦', '硬滑', '坏瓜'],

['青绿', '蜷缩', '沉闷', '稍糊', '稍凹', '硬滑', '坏瓜']

]

data2 = [

['青绿', '蜷缩', '沉闷', '清晰', '凹陷', '硬滑', '好瓜'],

['浅白', '蜷缩', '浊响', '清晰', '凹陷', '硬滑', '好瓜'],

['乌黑', '稍蜷', '浊响', '清晰', '稍凹', '硬滑', '好瓜'],

['乌黑', '稍蜷', '沉闷', '稍糊', '稍凹', '硬滑', '坏瓜'],

['浅白', '硬挺', '清脆', '模糊', '平坦', '硬滑', '坏瓜'],

['浅白', '蜷缩', '浊响', '模糊', '平坦', '软粘', '坏瓜'],

['青绿', '稍蜷', '浊响', '稍糊', '凹陷', '硬滑', '坏瓜']

]

attributes = [0, 1, 2, 3, 4, 5]

max_depth = 4

min_samples = 3

tree = create_tree(data1, attributes, max_depth, min_samples)

print("决策树:", tree)

print("分类结果:")

for sample in data2:

result = classify(tree, sample)

print(sample, "->", result)

print("分类精度:", accuracy(tree, data2))

白羊杆菌

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
2
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫