from math import log
#以决策为标准计算信息熵
def calcShannonEnt(dataSet):
numEntries = len(dataSet)
labelCounts = {}
for featVec in dataSet:
currentLabel = featVec[-1]
if currentLabel not in labelCounts.keys():
labelCounts[currentLabel] = 0
labelCounts[currentLabel] += 1
shannonEnt = 0.0
for key in labelCounts:
prob = float(labelCounts[key])/numEntries
shannonEnt -= prob *log(prob,2)
return shannonEnt
def creatDataSet():
dataSet = [[1,1,'yes'],[1,1,'yes'],[1,0,'no'],[0,1,'no'],[0,1,'no']]
labels = ['no surfacing','flippers']
return dataSet,labels
def splitDataSet(dataSet,axis,value):
retDataSet = []#根据特征新建链表
for featVec in dataSet:
if featVec[axis] == value:
reducedFeatVec = featVec[:axis]
re
机器学习实战——决策树
最新推荐文章于 2018-08-29 12:01:30 发布