from math import log
def calcShannonEnt(dataSet): //计算你传给我的数据集的熵 输入参数: 数据集合
numEntries=len(dataSet)
labelCounts={}
for featVec in dataSet:
currentLabel=featVec[-1]
if currentLabel not in labelCounts.keys():
labelCounts[currentLabel]=0
labelCounts[currentLabel]+=1
shannonEnt=0.0
for key in labelCounts:
prob=float(labelCounts[key])/numEntries
shannonEnt-=prob*log(prob,2)
return shannonEnt
def createDataSet(): //创建数据集
dataSet=[[1,1,'yes'],[1,1,'yes'],[1,0,'no'],[0,1,'no'],[0,1,'no']]
labels=['no surfacing','flippers']
return dataSet,labels
def createLabels(): //创建特征属性集合
labels=['no surfacing','flippers']
return labels
def splitDataSet(dataSet,axis,value): //将原特征集按照第axis属性 划分成子属性集 输入参数: 数据集合 特征属性下标axis 第axis属性特征值
def calcShannonEnt(dataSet): //计算你传给我的数据集的熵 输入参数: 数据集合
numEntries=len(dataSet)
labelCounts={}
for featVec in dataSet:
currentLabel=featVec[-1]
if currentLabel not in labelCounts.keys():
labelCounts[currentLabel]=0
labelCounts[currentLabel]+=1
shannonEnt=0.0
for key in labelCounts:
prob=float(labelCounts[key])/numEntries
shannonEnt-=prob*log(prob,2)
return shannonEnt
def createDataSet(): //创建数据集
dataSet=[[1,1,'yes'],[1,1,'yes'],[1,0,'no'],[0,1,'no'],[0,1,'no']]
labels=['no surfacing','flippers']
return dataSet,labels
def createLabels(): //创建特征属性集合
labels=['no surfacing','flippers']
return labels
def splitDataSet(dataSet,axis,value): //将原特征集按照第axis属性 划分成子属性集 输入参数: 数据集合 特征属性下标axis 第axis属性特征值