def createTree(dataSet,labels):
classList = [example[-1] for example in dataSet]
if classList.count(classList[0]) == len(classList):
return classList[0]#stop splitting when all of the classes are equal#所有的类标签都相同,直接返回该类标签
if len(dataSet[0]) == 1: #stop splitting when there are no more features in dataSet,#使用完了所有特征,仍然不能将数据集划分成仅包含唯一类别的分组,则返回出现次数最多的类别
return majorityCnt(classList)
bestFeat = chooseBestFeatureToSplit(dataSet)#当前数据集选取的最好特征
print "bestFeat:",bestFeat
bestFeatLabel = labels[bestFeat]#当前选取的最好特征与之对应的标签
print "bestFeatLabel:",bestFeatLabel
#开始创建树
myTree = {bestFeatLabel:{}}
del(labels[bestFeat])
featValues = [example[bestFeat] for example in dataSet]
uniqueVals = set(featValues)
for value in uniqueVals:
subLabels = labels[:] #copy all of labels, so trees don't mess up existing labels
myTree[bestFeatLabel][value] = createTree(splitDataSet(dataSet, bestFeat, value),subLabels)#递归了
return myTree
机器学习实战代码3-4
最新推荐文章于 2021-03-15 21:00:50 发布