1. 代码
基于jupyter notebook
#导包
import numpy as np
import math
import pandas as pd
from sklearn.utils.multiclass import type_of_target
from scipy import stats
#求woe值和iv值
def woe(X, y, event):
res_woe = [] #列表存放woe字典
res_iv = [] #列表存放iv
X1 = feature_discretion(X) #对连续型特征进行处理
for i in range(0, X1.shape[-1]): #遍历所有特征
x = X1[:, i] #单个特征
woe_dict, iv1 = woe_single_x(x, y, event) #计算单个特征的woe值
res_woe.append(woe_dict)
res_iv.append(iv1)
return np.array(res_woe), np.array(res_iv) #返回数组
#求单个特征的woe值
def woe_single_x(x, y, event):
event_total, non_event_total = count_binary(y, event) #计算好人坏人总数
x_labels = np.unique(x) #特征中的分段
woe_dict = {} #存放每个分段的名称 以及 其对应的woe值
iv = 0
for x1 in x_labels: #遍历每个分段
y1 &