import numpy as np
import pandas as pd
data={"feature1":[1,1,2,1,3,2,3],"feature2":[1,3,2,1,2,2,3],"label":[0,0,0,1,1,0,1]}
df=pd.DataFrame(data)
def calcute_iv(df,aim,res,py_i_all,pn_i_all):
'''
data : 只包含feature,和label
aim :关心的相应变量值,(流失为1)
'''
iv = 0
feature,label = df.columns
for i,data in df.groupby(df[feature]):
py_i = (data[label] == aim).sum() + 1
pn_i = (data[label] != aim).sum() + 1
woe_i = np.log((py_i/py_i_all)/(pn_i/pn_i_all))
iv += woe_i
res[feature] = iv
def fun_iv(train_data,label,aim):
'''
train_data : 用于计算的基础数据,包括label。由于没有做分组,因此特征需为离散特征
label : 指定数据集中label的名称
aim :关心的相应变量值,(流失为1)
return :
res : 各个特征计算iv结果对于的一个字典
'''
res = dict()
columns = list(train_data.columns)
py_i_all = (train_data[label]==aim).sum() + 1
pn_i_all = (train_data[label]!=aim).sum() + 1
columns.remove(label)
for column in columns:
data = train_data[[column,label]]
calcute_iv(data,aim,res,py_i_all,pn_i_all)
return res
res = fun_iv(df,'label',1)
res ---- >>>
{'feature1': -0.4054651081081644, 'feature2': -0.40546510810816444}
iv计算code
最新推荐文章于 2022-09-09 09:59:13 发布