代码依赖于上次的代码中的例子,连接为《统计学习方法》第5章Python3实现(一) 熵、条件熵、信息增益、信息增益比
计算基尼指数的代码如下:
"""
CART: Classification And Regression Tree
Created on Dec 28th,2018
@author:Aomo Jan
"""
def calcGini(dataSet,yIndex=-1):
"""
计算基尼指数(Gini Index)
"""
numEntries=len(dataSet)
classList=[dt[yIndex] for dt in dataSet]
classSet=set(classList)
pk={}
giniP=0
for ck in classSet:
kList=[ dt for dt in dataSet if dt[yIndex]==ck ]
pk[ck]=float(len(kList))/numEntries
giniP+=pk[ck]*(1-pk[ck])
return giniP
def calcGiniDA(dataSet,xIndex,attrX,yIndex=-1):
"""
计算集合在第xIndex个特征为attrX的条件下的基尼指数
"""
numEntries=len(dataSet)
d1=[dt for dt in dataSet if dt[xIndex]==attrX]
d2=[dt for dt in dataSet if dt[xIndex]!=attrX]
giniDA=(len(d1)*calcGini(d1)+len(d2)*calcGini(d2))/numEntries
return giniDA
def testDemo():
from entropy import loadDataSet
dataSet=loadDataSet()
giniP=calcGini(dataSet)
print('Gini=%f' % (giniP))
dataSet=loadDataSet()
gnA1=calcGiniDA(dataSet,3,'非常好')
print('Gini(D,A2=1)=%f' % (gnA1))
if __name__=='__main__':
testDemo()
欢迎关注我们: