信息熵
代码来源于《机器学习》
from math import log
import numpy as np
def calcShannonEnt(dataset):
numEntries =len(dataset)
labelCounts = {}
for favocter in dataset:
setkeys = favocter[-1]
if setkeys not in labelCounts.keys():
labelCounts[setkeys] = 0
labelCounts[setkeys] +=1
shnnoy = 0.0
for key in labelCounts.keys():
prop = float(labelCounts[key])/numEntries
shnnoy -=prop * log(prop,2)
return (shnnoy)
if __name__== '__main__':
df=np.array([[1,1],[1,1],[1,2],[1,2],[1,1]])
shon = calcShannonEnt(df)
print(shon)