tree.py 模板

# -*- coding: utf-8 -*-tree.py模板

#
# Copyright (C) 2010-2013 Renstech Ltd. All Rights Reserved
#
# This file is part of supernova.
#
# Authors:
# 'zhouyifeng'

# pylint: disable-msg=E1101

import json


class Node:
    def __init__(self, name, value, level=0):
        self.name = name
        self.value = value
        self.level = level
        self.url = None
        self.target = '_self'
        self.open = False
        self.path = name
        self.icon = None
        self.iconClose = None
        self.iconOpen = None
        self.parent = None
        self.children = []

    @property
    def isParent(self):
        return len(self.children) > 0

    def add_node(self, node):
        """
        添加子节点
        :param node: 所要添加的子节点
        :type node: Node
        """

        node.level = self.level + 1
        node.parent = self
        node.path = self.path + '/' + node.name
        self.children.append(node)

    def insert_node(self, index, node):
        node.level = self.level + 1
        node.parent = self
        node.path = self.path + '/' + node.name
        self.children.insert(index, node)

    def remove_node(self, node):
        self.children.remove(node)

    def pop_node(self):
        self.children.pop()

    def clear(self):
        self.children = []

    def get_node(self, name):
        for child in self.children:
            if child.name == name:
                return child

        return None

    def find_node(self, value):
        for child in self.children:
            if child.value == value:
                return child

            node = child.find_node(value)
            if node:
                return node

        return None

    def search_node(self, name):
        for child in self.children:
            if child.name == name:
                return child

            node = child.search_node(name)
            if node:
                return node

        return None

    def sort(self):
        self.children.sort(lambda x, y: cmp(x.value, y.value))
        for child in self.children:
            child.sort()

    def sort_by_name(self):
        self.children.sort(lambda x, y: cmp(x.name, y.name))
        for child in self.children:
            child.sort_by_name()

    def show(self, layer=0):
        print "  " * layer + self.name
        map(lambda child: child.show(layer + 1), self.children)

    def show_value(self, layer=0):
        print "  " * layer + self.value
        map(lambda child: child.show(layer + 1), self.children)

    def to_dict(self):
        result = {'name': self.name,
                  'value': self.value,
                  'level': self.level,
                  'open': self.open,
                  'path': self.path}

        if self.url:
            result['url'] = self.url
            result['target'] = self.target
        if self.icon:
            result['icon'] = self.icon
        if self.iconClose:
            result['iconClose'] = self.iconClose
        if self.iconOpen:
            result['iconOpen'] = self.iconOpen
        if self.children:
            result['children'] = [child.to_dict() for child in self.children]

        return result

    def to_json(self):
        return json.dumps(self.to_dict(), indent=4)

    def __unicode__(self):
        return self.name

    def __str__(self):
        return self.name


class Tree:
    def __init__(self, name=None):
        self.name = name
        self.nodes = []

    def to_json(self):
        return json.dumps([node.to_dict() for node in self.nodes], indent=4)

    def add_node(self, node):
        """
        树中添加节点
        :param node:所要添加的节点
        :type node:Node
        """

        node.level = 0
        node.parent = self
        self.nodes.append(node)

    def insert_node(self, index, node):
        node.level = 0
        node.parent = self
        self.nodes.insert(index, node)

    def remove_node(self, node):
        self.nodes.remove(node)

    def pop(self):
        self.nodes.pop()

    def sort(self):
        self.nodes.sort(lambda x, y: cmp(x.value, y.value))
        for node in self.nodes:
            node.sort()

    def sort_by_name(self):
        self.nodes.sort(lambda x, y: cmp(x.name, y.name))
        for node in self.nodes:
            node.sort_by_name()

    def get_node(self, name):
        for node in self.nodes:
            if node.name == name:
                return node

        return None

    def find_node(self, value):
        for node in self.nodes:
            if node.value == value:
                return node

            child_node = node.find_node(value)
            if child_node:
                return child_node

        return None

    def search_node(self, name):
        for node in self.nodes:
            if node.name == name:
                return node

            child_node = node.search_node(name)
            if child_node:
                return child_node

        return None

    def __unicode__(self):
        return self.name

    def __str__(self):
        return self.name


if __name__ == '__main__':
    root = Node("root", "root")

    sub_node1 = Node("a1", "a1")
    root.add_node(sub_node1)
    sub_node1.add_node(Node("a2", "a2"))
    sub_node1.add_node(Node("b2", "b2"))

    sub_node2 = Node("b1", "b1")
    root.add_node(sub_node2)
    sub_node2.add_node(Node("d2", "d2"))
    sub_node2.add_node(Node("c2", "c2"))

    print root.to_json()

    root.sort()
    print root.to_json()

    node = root.find_node("c2")
    if node:
        print node
    else:
        print "not find"

    node = root.find_node("c3")
    if node:
        print node
    else:
        print "not find"
好的,让我为你解答一下这个问题。 首先,你需要下载数据集,iris 数据集可以从 sklearn 中导入,boston 数据集可以从 sklearn.datasets 中导入。导入数据集的代码如下: ```python from sklearn.datasets import load_iris, load_boston iris = load_iris() X_iris = iris.data y_iris = iris.target boston = load_boston() X_boston = boston.data y_boston = boston.target ``` 接下来,你需要将数据集划分为训练集和测试集。可以使用 sklearn.model_selection 中的 train_test_split 函数来实现。代码如下: ```python from sklearn.model_selection import train_test_split X_iris_train, X_iris_test, y_iris_train, y_iris_test = train_test_split(X_iris, y_iris, test_size=0.2, random_state=42) X_boston_train, X_boston_test, y_boston_train, y_boston_test = train_test_split(X_boston, y_boston, test_size=0.2, random_state=42) ``` 接下来,你可以使用 numpy 库来实现 CART 分类/回归树算法。参考程序模板:cart_numpy_template.py。在实现过程中,你可以加入 TN 样本数量阈值和 TG 基尼指数阈值作为终止条件,以提高模型的泛化能力。对于离散特征的分枝,可以使用决策树的信息增益或基尼指数来进行特征选择。这里只给出 CART 分类树的实现代码示例: ```python import numpy as np class CARTClassifier: def __init__(self, min_samples_leaf=1, min_impurity_decrease=0.0): self.min_samples_leaf = min_samples_leaf self.min_impurity_decrease = min_impurity_decrease def fit(self, X, y): self.n_classes_ = len(np.unique(y)) self.tree_ = self._build_tree(X, y) def predict(self, X): return np.array([self._predict(inputs) for inputs in X]) def _build_tree(self, X, y): if len(y) == 0: return None n_samples, n_features = X.shape n_labels = len(np.unique(y)) if n_labels == 1: return {'leaf': True, 'class': y[0]} if n_samples < self.min_samples_leaf: return {'leaf': True, 'class': np.argmax(np.bincount(y))} best_feature, best_threshold, best_impurity = self._best_split(X, y) if best_impurity == 0: return {'leaf': True, 'class': np.argmax(np.bincount(y))} left_idx = np.where(X[:, best_feature] <= best_threshold)[0] right_idx = np.where(X[:, best_feature] > best_threshold)[0] left_tree = self._build_tree(X[left_idx, :], y[left_idx]) right_tree = self._build_tree(X[right_idx, :], y[right_idx]) return {'leaf': False, 'feature': best_feature, 'threshold': best_threshold, 'left': left_tree, 'right': right_tree} def _best_split(self, X, y): best_feature = None best_threshold = None best_impurity = np.inf for feature_idx in range(X.shape[1]): thresholds = np.unique(X[:, feature_idx]) for threshold in thresholds: y_left = y[X[:, feature_idx] <= threshold] y_right = y[X[:, feature_idx] > threshold] impurity = self._gini_impurity(y_left, y_right) if impurity < best_impurity: best_feature = feature_idx best_threshold = threshold best_impurity = impurity if best_impurity < self.min_impurity_decrease: return None, None, 0 return best_feature, best_threshold, best_impurity def _gini_impurity(self, y_left, y_right): p_l = len(y_left) / (len(y_left) + len(y_right)) p_r = 1 - p_l gini_l = 1 - sum((np.bincount(y_left) / len(y_left)) ** 2) gini_r = 1 - sum((np.bincount(y_right) / len(y_right)) ** 2) return p_l * gini_l + p_r * gini_r def _predict(self, inputs): node = self.tree_ while not node['leaf']: if inputs[node['feature']] <= node['threshold']: node = node['left'] else: node = node['right'] return node['class'] ``` 最后,你可以输出树模型,进行预测,并评估模型性能。代码如下: ```python clf = CARTClassifier() clf.fit(X_iris_train, y_iris_train) print(clf.tree_) y_iris_pred = clf.predict(X_iris_test) iris_accuracy = np.sum(y_iris_pred == y_iris_test) / len(y_iris_test) print('Iris accuracy:', iris_accuracy) clf = CARTRegressor() clf.fit(X_boston_train, y_boston_train) print(clf.tree_) y_boston_pred = clf.predict(X_boston_test) boston_mse = np.mean((y_boston_pred - y_boston_test) ** 2) print('Boston MSE:', boston_mse) ``` 参考资料: 1. scikit-learn 官方文档:https://scikit-learn.org/stable/modules/tree.html 2. CART算法原理与实现:https://www.cnblogs.com/Jie-Meng/p/decisiion_tree_c4.5_CART.html 3. 决策树算法详解:https://www.jianshu.com/p/6bfcfc61a6c0
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

忆枫717

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值