from numpy import *
from math import log
import operator
def calcShannonEnt(dataset):
numdataset=len(dataset)
labelCount={}
for data in dataset:
curlabel=data[-1]
if curlabel not in labelCount.keys():
labelCount[curlabel]=0
labelCount[curlabel]+=1
shannonEnt=0.0
for key in labelCount:
prob=float(labelCount[key])/numdataset
shannonEnt-=prob*log(prob,2)
return shannonEnt
def creatDataSet():
dataset=[['青年','否','否','一般','否'],
['青年','否','否','好','否'],
['青年','是','否','好','是'],
['青年','是','是','一般','是'],
['青年','否','否','一般','否'],
['中年','否','否','一般',
李航 统计学习 例5.3 利用ID3算法建立决策树
最新推荐文章于 2024-04-20 22:52:28 发布