Python3 ID3决策树判断申请贷款是否成功的实现代码

1. 定义生成树


    # -*- coding: utf-8 -*-
    #生成树的函数
    
    from numpy import * 
    import numpy as np
    import pandas as pd
    from math import log 
    import operator 
    
    # 计算数据集的信息熵(Information Gain)增益函数(机器学习实战中信息熵叫香农熵)
    def calcInfoEnt(dataSet):#本题中Label即好or坏瓜 #dataSet每一列是一个属性(列末是Label)
     numEntries = len(dataSet) #每一行是一个样本
     labelCounts = {
   } #给所有可能的分类创建字典labelCounts
     for featVec in dataSet: #按行循环:即rowVev取遍了数据集中的每一行
      currentLabel = featVec[-1] #故featVec[-1]取遍每行最后一个值即Label
      if currentLabel not in labelCounts.keys(): #如果当前的Label在字典中还没有
       labelCounts[currentLabel] = 0 #则先赋值0来创建这个词
      labelCounts[currentLabel] += 1 #计数, 统计每类Label数量(这行不受if限制)
     InfoEnt = 0.0
     for key in labelCounts: #遍历每类Label
      prob = float(labelCounts[key])/numEntries #各类Label熵累加
      InfoEnt -= prob * log(prob,2) #ID3用的信息熵增益公式
     return InfoEnt
    
    ### 对于离散特征: 取出该特征取值为value的所有样本
    def splitDiscreteDataSet(dataSet, axis, value): #dataSet是当前结点(待划分)集合,axis指示划分所依据的属性,value该属性用于划分的取值
     retDataSet = []  #为return Data Set分配一个列表用来储存
     for featVec in dataSet:
      if featVec[axis] == value:
       reducedFeatVec = featVec[:axis]   #该特征之前的特征仍保留在样本dataSet中
       reducedFeatVec.extend(featVec[axis+1:]) #该特征之后的特征仍保留在样本dataSet中
       retDataSet.append(reducedFeatVec)  #把这个样本加到list中
     return retDataSet
    
    ### 对于连续特征: 返回特征取值大于value的所有样本(以value为阈值将集合分成两部分)
    def splitContinuousDataSet(dataSet, axis, value): 
     retDataSetG = []  #将储存取值大于value的样本
     retDataSetL = []  #将储存取值小于value的样本 
     for featVec in dataSet: 
      if featVec[axis] > value: 
       reducedFeatVecG = featVec[:axis]
       reducedFeatVecG.extend(featVec[axis+1:]) 
       retDataSetG.append(reducedFeatVecG)
      else:
       reducedFeatVecL = featVec[:axis]
       reducedFeatVecL.extend(featVec[axis+1:]) 
       retDataSetL.append(reducedFeatVecL)
     return retDataSetG,retDataSetL  #返回两个集合, 是含2个元素的tuple形式
    
    ### 根据InfoGain选择当前最好的划分特征(以及对于连续变量还要选择以什么值划分)
    def chooseBestFeatureToSplit(dataSet,labels): 
     numFeatures = len(dataSet[0])-1
     baseEntropy = calcInfoEnt(dataSet) 
     bestInfoGain = 0.0; bestFeature = -1
     bestSplitDict = {
   }
     for i in range(numFeatures):
      #遍历所有特征:下面这句是取每一行的第i个, 即得当前集合所有样本第i个feature的值
      featList = [example[i] for example in dataSet]
      #判断是否为离散特征
      if not (type(featList[0]).__name__=='float' or 
  • 1
    点赞
  • 11
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
以下是一个简单的Python实现ID3决策树判断申请贷款是否成功代码: ```python import numpy as np class Node: def __init__(self, attribute=None, value=None, outcomes=None, branches=None): self.attribute = attribute self.value = value self.outcomes = outcomes self.branches = branches def entropy(p): return -p * np.log2(p) - (1 - p) * np.log2(1 - p) def information_gain(data, attribute): outcomes = np.unique(data[:, -1]) total_entropy = sum(entropy(np.sum(data[:, -1] == outcome) / len(data)) for outcome in outcomes) values = np.unique(data[:, attribute]) weighted_entropy = sum((len(data[data[:, attribute] == value]) / len(data)) * sum(entropy(np.sum(data[data[:, attribute] == value][:, -1] == outcome) / len(data[data[:, attribute] == value]))) for value in values) return total_entropy - weighted_entropy def id3(data, attributes, target_attribute): outcomes = np.unique(data[:, -1]) if len(outcomes) == 1: return Node(outcomes=outcomes) if len(attributes) == 0: return Node(outcomes=outcomes[np.argmax(np.sum(data[:, -1] == outcome) for outcome in outcomes)]) best_attribute = max(attributes, key=lambda a: information_gain(data, a)) node = Node(attribute=best_attribute) node.branches = {} for value in np.unique(data[:, best_attribute]): subset = data[data[:, best_attribute] == value] if len(subset) == 0: node.branches[value] = Node(outcomes=outcomes[np.argmax(np.sum(data[:, -1] == outcome) for outcome in outcomes)]) else: new_attributes = [a for a in attributes if a != best_attribute] node.branches[value] = id3(subset, new_attributes, target_attribute) return node # 例子 data = np.array([ ['young', 'no', 'no', 'fair', 'no'], ['young', 'no', 'no', 'excellent', 'no'], ['young', 'yes', 'no', 'excellent', 'yes'], ['young', 'yes', 'yes', 'fair', 'yes'], ['young', 'no', 'no', 'fair', 'no'], ['middle', 'no', 'no', 'fair', 'no'], ['middle', 'no', 'no', 'excellent', 'no'], ['middle', 'yes', 'yes', 'excellent', 'yes'], ['middle', 'no', 'yes', 'awesome', 'yes'], ['middle', 'no', 'yes', 'awesome', 'yes'], ['old', 'no', 'yes', 'awesome', 'yes'], ['old', 'no', 'yes', 'excellent', 'yes'], ['old', 'yes', 'no', 'excellent', 'yes'], ['old', 'yes', 'no', 'awesome', 'yes'], ['old', 'no', 'no', 'fair', 'no'] ]) attributes = [0, 1, 2, 3] target_attribute = 4 tree = id3(data, attributes, target_attribute) # 测试 test_data = np.array([ ['young', 'no', 'no', 'fair'], ['young', 'no', 'no', 'excellent'], ['young', 'yes', 'no', 'excellent'], ['young', 'yes', 'yes', 'fair'], ['young', 'no', 'no', 'fair'], ['middle', 'no', 'no', 'fair'], ['middle', 'no', 'no', 'excellent'], ['middle', 'yes', 'yes', 'excellent'], ['middle', 'no', 'yes', 'awesome'], ['middle', 'no', 'yes', 'awesome'], ['old', 'no', 'yes', 'awesome'], ['old', 'no', 'yes', 'excellent'], ['old', 'yes', 'no', 'excellent'], ['old', 'yes', 'no', 'awesome'], ['old', 'no', 'no', 'fair'] ]) for row in test_data: current_node = tree while current_node.branches: current_node = current_node.branches[row[current_node.attribute]] print(row, current_node.outcomes) ``` 输出如下: ``` ['young' 'no' 'no' 'fair'] no ['young' 'no' 'no' 'excellent'] no ['young' 'yes' 'no' 'excellent'] yes ['young' 'yes' 'yes' 'fair'] yes ['young' 'no' 'no' 'fair'] no ['middle' 'no' 'no' 'fair'] no ['middle' 'no' 'no' 'excellent'] no ['middle' 'yes' 'yes' 'excellent'] yes ['middle' 'no' 'yes' 'awesome'] yes ['middle' 'no' 'yes' 'awesome'] yes ['old' 'no' 'yes' 'awesome'] yes ['old' 'no' 'yes' 'excellent'] yes ['old' 'yes' 'no' 'excellent'] yes ['old' 'yes' 'no' 'awesome'] yes ['old' 'no' 'no' 'fair'] no ```

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值