# 机器学习评价方法之NRIG

by wangben 2015.11

python测试代码如下：

#!/usr/bin/python
#by wangben@58.com 20151020 NRIG SCORE,big is better,infomation gain is bigger
import sys
import math

#
def entropy(p):
return -( p * math.log(p,2) + (1 - p) * math.log(1 - p,2) )
def crossEntropy( label, p ):
return -( label * math.log(p,2) + (1 - label) * math.log(1 - p,2) )

if __name__ == "__main__":
listScore = []
sum = 0.0
test_size = 0.0
ce_sum = 0.0
avg_pre = 0.0
#input format:
#label\tpredictScore
for line in open(sys.argv[1]):
line = line.rstrip()
listLine = line.split()

if len(listLine) != 2:
continue
label = float(listLine[0])
value = float(listLine[1])
test_size += 1.0
if label > 0:
sum += label
ce_sum += crossEntropy( label , value)
#print "debug :",ce_sum
avg_pre += value
listScore.append([label,value])

print "empirical CTR(avg CTR): ",sum,test_size,sum/test_size
avg_p = sum/test_size
Hp = entropy(avg_p)
print "entropy: ",Hp

ce = ce_sum/test_size
print "cross entropy: ",ce_sum,test_size,ce
print "RIG: ",(Hp-ce)/Hp

#NRIG
n_ce = 0.0
avg_pre /= test_size
print "average prediction: ",avg_pre
diff_rate = avg_p/avg_pre
for item in listScore:
n_ce += crossEntropy( item[0] , item[1]*diff_rate )
n_ce /= test_size
print "normalized ce: ",n_ce
print "NRIG: ",(Hp-n_ce)/Hp


reference: