【机器学习】手动实现分类决策树 (不用sk-learn)
不用sk-learn库,一步一步实现分类决策树,外加可视化哦~
- 树结构:二叉树结构+决策节点+叶子节点
- 度量:entropy或gini
- 构造树
- 读入数据:车辆轮廓分类(数据量:846,特征维度:18)
数据来源:CSDN - 一些用于聚类和分类问题的数据集 分类数据35 - 训练
- 测试
下面就开始啦
树结构
二叉树结构
from graphviz import Digraph
import uuid
from random import sample
class BinaryTree:
def __init__(self, rootObj):
self.key = rootObj
self.leftChild = None
self.rightChild = None
self.dot = Digraph(comment='Binary Tree')
def insertLeft(self, newNode):
if self.leftChild is None:
self.leftChild = newNode
else:
t = newNode
t.leftChild = self.leftChild
self.leftChild = t
def insertRight(self, newNode):
if self.rightChild is None:
self.rightChild = newNode
else:
t = newNode
t.rightChild = self.rightChild
self.rightChild = t
# 可视化,输出到Binary_Tree.gv
def print_tree(self, save_path='./Binary_Tree.gv', label=True):
# colors for labels of nodes
colors = ['skyblue', 'tomato', 'orange', 'purple', 'green', 'yellow', 'pink', 'red']
# Draws a binary tree with a node as its root
def print_node(node, node_tag):
# The node color
color = sample(colors, 1)[0]
if node.leftChild is not None:
if type(node.leftChild.key).__name__ == 'DecisionNode': # DecisionNode
left_tag = str(uuid.uuid1()) # Data for the left node
self.dot.node(left_tag,
str(node.leftChild.key.feature_name + " <= " + str(
node.leftChild.key.feature_value) + " ? "),
style='filled', color=color) # left
label_string = 'Y' if label else '' # Whether to label the connector indicates a left subtree
self.dot.edge(node_tag, left_tag, label=label_string) # The line between the left child and parent
print_node(node.leftChild, left_tag)
else: # leaf
left_tag = str(uuid.uuid1())
self.dot.node(left_tag, str(
"samples = " + str(node.leftChild.key.num) + "\n value = " + str(node.leftChild.key.value)),
style='filled', color=color)
label_string = 'Y' if label else ''
self.dot.edge(node_tag, left_tag, label=label_string)
print_node(node.leftChild, left_tag)
if node.rightChild is not None:
if type(node.rightChild.key).__name__ == 'DecisionNode'

最低0.47元/天 解锁文章
2474

被折叠的 条评论
为什么被折叠?



