Q:
(选做) 自己编写求Gini指数的函数,并与纸质作业比较,验证函数正确性。
ps:最简单的求解函数,只是方便计算简单题目
def gini_index(x,y,i):
# x:feature list
# y:result list
# i:x里的第i列的属性
x1,y11,y12 = [],[],[]
for j in range(len(x)):
if x[j][i]==1:
x1.append(1)
if y[j]==1:
y11.append(1)
elif y[j]==0:
y12.append(0)
a1 = len(y11)
a2 = len(y12)
gini = 1-(a1/len(x1))**2-(a2/len(x1))**2
gini_index_1 = len(x1)/len(y)*gini
x2,y21,y22 = [],[],[]
for j in range(len(x)):
if x[j][i]==0:
x2.append(0)
if y[j]==1:
y21.append(1)
elif y[j]==0:
y22.append(0)
a1 = len(y21)
a2 = len(y22)
gini = 1-(a1/len(x2))**2-(a2/len(x2))**2
gini_index_2 = len(x2)/len(y)*gini
gini_index = gini_index_1 + gini_index_2
return gini_index
- 应用:
a1 = [1,1,1,1,1,0,0,0,1,1]
a2 = [0,1,1,0,1,0,0,0,1,0]
X=[]
for i in range(len(a1)):
x = [a1[i],a2[i]]
X.append(x)
# 类别
Y = [1,1,1,0,1,0,0,0,0,0]
print('第%d个属性的基尼指数:%f'%(0,gini_index(X,Y,0)))
print('第%d个属性的基尼指数:%f'%(1,gini_index(X,Y,1)))
运行结果:
第0个属性的基尼指数:0.342857
第1个属性的基尼指数:0.316667