#coding=utf-8
from math import log
def calcShannonEnt(dataSet):
numEntries=len(dataSet)
labelCounts={}
# 为所有可能分类创建字典
for featVec in dataSet:
currentLabel=featVec[-1]
if currentLabel not in labelCounts.keys():
labelCounts[currentLabel]=0
labelCounts[currentLabel]+=1
shannonEnt=0.0 # 香农熵
for key in labelCounts:
prob=float(labelCounts[key])/numEntries
shannonEnt-=prob*log(prob,2) # 以2为底求对数
return shannonEnt
def createDataSet():
dataSet=[[1,1,'yes'],
[1,1,'yes'],
[1,0,'no'],
[0,1,'no'],
[0,1,'no']]
labels=['no surfacing','flippers']
return dataSet,labels
def splitDataSet(dataSet,axis,value):
# python在函数中传递的是列表的引用,在函数内部对列表对象的修改,
# 将会影响该列表对象的整个生存周期。所以需要在函数的开始声明新的list
机器学习实战,第三章,临时代码
最新推荐文章于 2021-12-21 16:36:40 发布