from math import log
import operator
import pandas as pd
import numpy as np
def calcShannonEnt(dataSet): # 计算数据的熵(entropy)
numEntries=len(dataSet) # 数据条数
labelCounts={
}
for featVec in dataSet:
currentLabel=featVec[-1] # 每行数据的最后一个字(类别)
if currentLabel not in labelCounts.keys():
labelCounts[currentLabel]=0
labelCounts[currentLabel]+=1 # 统计有多少个类以及每个类的数量
shannonEnt=0
for key in labelCounts:
prob=float(labelCounts[key])/numEntries # 计算单个类的熵值
shannonEnt-=prob*log(prob,2) # 累加每个类的熵值
return shannonEnt
def createDataSet1(): # 创造数据集
dataSet = [['<=30', 'high', 'no', 'fair', 'no'],
['<=30', 'high', 'no', 'excellent', 'no'],
['31…40', 'high', 'no', 'fair'
决策树买不买电脑
最新推荐文章于 2022-04-23 22:12:07 发布