代码说明:
函数:createDataSet():初始化
函数:calcShannonEnt(dataSet):求取熵
函数:splitDataSet(dataSet, axis, value):依据axis,与value进行划分
函数:chooseBestFeatureToSplit(dataSet):根据信息增益,得出适合划分的特征;
def createDataSet():
dataSet = [[1, 1, 'yes'],
[1, 1, 'yes'],
[1, 0, 'no'],
[0, 1, 'no'],
[0, 1, 'no']]
labels = ['no surfacing','flippers']
#change to discrete values
return dataSet, labels
def calcShannonEnt(dataSet):
numEntries = len(dataSet)
labelCounts = {}
for featVec in dataSet: #the the number of unique elements and their occurance
currentLabel = featVec[-1]
if currentLabel not in labelCounts.keys(): labelCounts[currentLabel] = 0
labelCounts[currentLabel] +&#