from math importlogimportoperatordef calcShannonEnt(dataSet): #计算数据的熵
numEntries=len(dataSet) #数据条数
labelCounts={}for featVec indataSet:
currentLabel=featVec[-1]#每一行最后一个字(类别)
if currentLabel not inlabelCounts.keys():
labelCounts[currentLabel]=0
labelCounts[currentLabel]+=1 #统计有多少个类似以及每个类的数量
shannonEnt=0for key inlabelCounts:
prob=float(labelCounts[key])/numEntries #计算单个类的熵值
shannonEnt-=prob*log(prob,2)#累加每个类的熵值
returnshannonEntdef createDataSet1(): #创建示例数据
dataSet = [['晴天','高温','中湿','无风','不宜'],
['晴天','高温','中湿','有风','不宜'],
['多云','高温','低湿','无风','适宜'],
['雨天','低温','高湿','无风','适宜'],
['雨天','低温','低湿','无风','适宜'],
[&#