信息学奥赛一本通1336:寻找树根和孩子(图论算法-树)详细讲解

  • 1
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
决策算法可以用来进行分类或者回归任务,下面分别给出分类和回归任务的决策算法实现。 分类决策算法实现: ```python import numpy as np class Node: def __init__(self, feature_index=None, threshold=None, left=None, right=None, info_gain=None, value=None): self.feature_index = feature_index # 特征索引 self.threshold = threshold # 分割阈值 self.left = left # 左子 self.right = right # 右子 self.info_gain = info_gain # 信息增益 self.value = value # 叶子节点的类别值 class DecisionTreeClassifier: def __init__(self, max_depth=None, min_samples_split=2): self.max_depth = max_depth # 决策最大深度 self.min_samples_split = min_samples_split # 分割所需最小样本数 self.root = None # 决策树根节点 def fit(self, X, y): self.root = self._grow_tree(X, y) def _grow_tree(self, X, y, depth=0): n_samples, n_features = X.shape n_labels = len(np.unique(y)) # 如果样本数小于所需最小样本数或者达到最大深度,返回叶子节点 if (n_samples < self.min_samples_split) or (self.max_depth is not None and depth >= self.max_depth): return Node(value=self._most_common_label(y)) # 计算每个特征的信息增益,并选择最好的特征进行分割 best_feature, best_threshold, best_info_gain = self._best_criteria(X, y) # 如果信息增益小于等于0,则返回叶子节点 if best_info_gain == 0: return Node(value=self._most_common_label(y)) # 递归构建左右子 left_indices, right_indices = self._split(X[:, best_feature], best_threshold) left = self._grow_tree(X[left_indices, :], y[left_indices], depth+1) right = self._grow_tree(X[right_indices, :], y[right_indices], depth+1) return Node(best_feature, best_threshold, left, right, best_info_gain) def _best_criteria(self, X, y): best_info_gain = -1 best_feature = None best_threshold = None # 在所有特征中选择最好的分割点 for feature_index in range(X.shape[1]): # 计算当前特征的信息增益和最佳分割点 feature_values = X[:, feature_index] thresholds = np.unique(feature_values) for threshold in thresholds: info_gain = self._information_gain(y, feature_values, threshold) if info_gain > best_info_gain: best_info_gain = info_gain best_feature = feature_index best_threshold = threshold return best_feature, best_threshold, best_info_gain def _information_gain(self, y, X_feature, threshold): parent_entropy = self._entropy(y) left_indices, right_indices = self._split(X_feature, threshold) if len(left_indices) == 0 or len(right_indices) == 0: return 0 n = len(y) n_l, n_r = len(left_indices), len(right_indices) e_l, e_r = self._entropy(y[left_indices]), self._entropy(y[right_indices]) child_entropy = (n_l / n) * e_l + (n_r / n) * e_r return parent_entropy - child_entropy def _entropy(self, y): n_labels = len(np.unique(y)) if n_labels <= 1: return 0 counts = np.bincount(y) probabilities = counts / len(y) entropy = -np.sum([p * np.log2(p) for p in probabilities if p > 0]) return entropy def _split(self, X_feature, threshold): left_indices = np.argwhere(X_feature <= threshold).flatten() right_indices = np.argwhere(X_feature > threshold).flatten() return left_indices, right_indices def _most_common_label(self, y): return np.bincount(y).argmax() def predict(self, X): return np.array([self._traverse_tree(x, self.root) for x in X]) def _traverse_tree(self, x, node): if node.value is not None: return node.value if x[node.feature_index] <= node.threshold: return self._traverse_tree(x, node.left) else: return self._traverse_tree(x, node.right) ``` 回归决策算法实现: ```python import numpy as np class Node: def __init__(self, feature_index=None, threshold=None, left=None, right=None, value=None): self.feature_index = feature_index # 特征索引 self.threshold = threshold # 分割阈值 self.left = left # 左子 self.right = right # 右子 self.value = value # 叶子节点的预测值 class DecisionTreeRegressor: def __init__(self, max_depth=None, min_samples_split=2): self.max_depth = max_depth # 决策最大深度 self.min_samples_split = min_samples_split # 分割所需最小样本数 self.root = None # 决策树根节点 def fit(self, X, y): self.root = self._grow_tree(X, y) def _grow_tree(self, X, y, depth=0): n_samples, n_features = X.shape # 如果样本数小于所需最小样本数或者达到最大深度,返回叶子节点 if (n_samples < self.min_samples_split) or (self.max_depth is not None and depth >= self.max_depth): return Node(value=np.mean(y)) # 计算每个特征的最佳分割点,并选择最好的特征进行分割 best_feature, best_threshold = self._best_criteria(X, y) # 递归构建左右子 left_indices, right_indices = self._split(X[:, best_feature], best_threshold) left = self._grow_tree(X[left_indices, :], y[left_indices], depth+1) right = self._grow_tree(X[right_indices, :], y[right_indices], depth+1) return Node(best_feature, best_threshold, left, right) def _best_criteria(self, X, y): best_mse = np.inf best_feature = None best_threshold = None # 在所有特征中选择最好的分割点 for feature_index in range(X.shape[1]): # 计算当前特征的最佳分割点 feature_values = X[:, feature_index] thresholds = np.unique(feature_values) for threshold in thresholds: mse = self._mse(y, feature_values, threshold) if mse < best_mse: best_mse = mse best_feature = feature_index best_threshold = threshold return best_feature, best_threshold def _mse(self, y, X_feature, threshold): left_indices, right_indices = self._split(X_feature, threshold) if len(left_indices) == 0 or len(right_indices) == 0: return np.inf y_left, y_right = y[left_indices], y[right_indices] mse = (len(y_left) * np.var(y_left) + len(y_right) * np.var(y_right)) / len(y) return mse def _split(self, X_feature, threshold): left_indices = np.argwhere(X_feature <= threshold).flatten() right_indices = np.argwhere(X_feature > threshold).flatten() return left_indices, right_indices def predict(self, X): return np.array([self._traverse_tree(x, self.root) for x in X]) def _traverse_tree(self, x, node): if node.value is not None: return node.value if x[node.feature_index] <= node.threshold: return self._traverse_tree(x, node.left) else: return self._traverse_tree(x, node.right) ```

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值