决策树 C4.5

相对ID3,改为判断ratio
for value in uniqueVals:     #计算每种划分方式的信息熵
    subdataset=splitdataset(dataset,i,value)
    p=len(subdataset)/float(len(dataset))
    newEnt+=p*jisuanEnt(subdataset)
    IV=IV-p*log(p,2)
infoGain=baseEnt-newEnt
if (IV == 0): # fix the overflow bug
    continue
infoGain_ratio = infoGain / IV                   #这个feature的infoGain_ratio    

或者

 for value in uniqueVals:
            subDataSet = splitDataSet(dataSet, i, value)
            prob = len(subDataSet) / float(len(dataSet))
            newEntropy += prob *calcShannonEntOfFeature(subDataSet, -1)    #calc conditional entropy
 infoGain = baseEntropy - newEntropy
 iv = calcShannonEntOfFeature(dataSet, i)
 if(iv == 0):    #value of the feature is all same,infoGain and iv all equal 0, skip the feature
    continue
  infoGainRate = infoGain / iv
#calc shannon entropy of label or feature
def calcShannonEntOfFeature(dataSet, feat):
    numEntries = len(dataSet)
    labelCounts = {}
    for feaVec in dataSet:
        currentLabel = feaVec[feat]
        if currentLabel not in labelCounts:
            labelCounts[currentLabel] = 0
        labelCounts[currentLabel] += 1
    shannonEnt = 0.0
    for key in labelCounts:
        prob = float(labelCounts[key])/numEntries
        shannonEnt -= prob * log(prob, 2)
    return shannonEnt
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值