from numpy import *
from math import log
# 计算数据集的熵
def calcShannonEnt(dataset):
numdataset=len(dataset)
labelCount={}
for data in dataset:
curlabel=data[-1]
if curlabel not in labelCount.keys():
labelCount[curlabel]=0
labelCount[curlabel]+=1
shannonEnt=0.0
for key in labelCount:
prob=float(labelCount[key])/numdataset
shannonEnt-=prob*log(prob,2)
return shannonEnt
# 加载数据
def creatDataSet():
dataset=[['青年','否','否','一般','否'],
['青年','否','否','好','否'],
['青年','是','否','好','是'],
['青年','是','是','一般','是'