defget_iv(df):
rate = df["good%"]- df["bad%"]
iv = np.sum(rate * df.woe)return iv
#wof证据权重(weight of Evidence),计算特征在单个分箱中的违约概率的指标#IV代表的意义是我们特征(所有分箱)上的信息量以及这个特征(所有分箱)对模型的贡献
3.3.4 卡方检验,合并箱体,画出IV曲线
num_bins_=num_bins.copy()import matplotlib.pyplot as plt
import scipy
x1=num_bins_[0][2:]
x1
(4243, 7543)
x2=num_bins_[1][2:]
x2
(3571, 5927)
scipy.stats.chi2_contingency([x1,x2])[0]
5.705081033738888
len(num_bins_)
20
pvs=[]for i inrange(len(num_bins_)-1):#循环19次
x1=num_bins_[i][2:]
x2=num_bins_[i+1][2:]#[0]返回卡方chi2-value,[1]返回p-value#p值大:两箱合并
pv=scipy.stats.chi2_contingency([x1,x2])[1]#chi2=scipy.states.chi2_contingency(x1,x2)[0]
pvs.append(pv)
data = model_data.copy()#函数pd.cut,根据所给所有各箱的上下界分箱#参数为 pd.cut(数据,所有分箱的上下界)
data = data[["age","SeriousDlqin2yrs"]].copy()
data["cut"]= pd.cut(data["age"],[-np.inf,48.49986200790144,58.757170160044694,64.0,74.0, np.inf])
data
#创建一个以woe值覆盖原模型数据的新列表#还原索引
model_woe = pd.DataFrame(index=model_data.index)#(依据分箱上下界)将原数据分箱后,将结果映射到新列表中
model_woe["age"]= pd.cut(model_data["age"],bins_of_col["age"]).map(woeall["age"])#循环所有特征for col in bins_of_col:
model_woe[col]= pd.cut(model_data[col],bins_of_col[col]).map(woeall[col])#补充标签
model_woe["SeriousDlqin2yrs"]= model_data["SeriousDlqin2yrs"]
model_woe.head()
age
RevolvingUtilizationOfUnsecuredLines
DebtRatio
MonthlyIncome
NumberOfOpenCreditLinesAndLoans
NumberOfTime30-59DaysPastDueNotWorse
NumberOfTimes90DaysLate
NumberRealEstateLoansOrLines
NumberOfTime60-89DaysPastDueNotWorse
NumberOfDependents
SeriousDlqin2yrs
0
-0.278138
2.205113
-0.01122
-0.238024
-0.055325
0.133757
0.088506
-0.146831
0.028093
0.202748
0
1
1.004098
0.665610
-0.01122
-0.238024
-0.055325
0.133757
0.088506
-0.146831
0.028093
0.202748
0
2
-0.278138
-2.040304
-0.01122
-0.238024
-0.055325
0.133757
0.088506
-0.146831
-1.779675
-0.477951
1
3
1.004098
2.205113
-0.47269
-0.238024
0.123566
0.133757
0.088506
0.620994
0.028093
0.202748
0
4
-0.278138
-1.073125
-0.01122
0.232036
0.123566
0.133757
0.088506
0.620994
0.028093
0.202748
1
3.5建模与模型验证
#3.5建模与模型验证#处理测试集
vali_woe = pd.DataFrame(index=vali_data.index)for col in bins_of_col:
vali_woe[col]= pd.cut(vali_data[col],bins_of_col[col]).map(woeall[col])
vali_woe["SeriousDlqin2yrs"]= vali_data["SeriousDlqin2yrs"]
vali_woe.head()
RevolvingUtilizationOfUnsecuredLines
age
DebtRatio
MonthlyIncome
NumberOfOpenCreditLinesAndLoans
NumberOfTime30-59DaysPastDueNotWorse
NumberOfTimes90DaysLate
NumberRealEstateLoansOrLines
NumberOfTime60-89DaysPastDueNotWorse
NumberOfDependents
SeriousDlqin2yrs
0
2.205113
0.247606
1.521696
-0.238024
-0.055325
0.133757
0.088506
-0.146831
0.028093
0.202748
0
1
-1.073125
-0.278138
-0.011220
0.232036
0.123566
0.133757
0.088506
0.620994
0.028093
-0.477951
1
2
2.205113
1.004098
-0.011220
0.232036
-0.055325
0.133757
0.088506
-0.146831
0.028093
0.202748
0
3
2.205113
-0.278138
-0.011220
-0.238024
0.123566
0.133757
0.088506
-0.146831
0.028093
0.202748
0
4
-1.073125
-0.278138
-0.011220
-0.238024
0.123566
0.133757
0.088506
-0.146831
0.028093
0.202748
1
#训练集
X = model_woe.iloc[:,:-1]
y = model_woe.iloc[:,-1]#测试集
vali_X = vali_woe.iloc[:,:-1]
vali_y = vali_woe.iloc[:,-1]
from sklearn.linear_model import LogisticRegression as LR
lr = LR().fit(X,y)
lr.score(vali_X,vali_y)
D:\py1.1\lib\site-packages\sklearn\base.py:493: FutureWarning: The feature names should match those that were passed during fit. Starting version 1.2, an error will be raised.
Feature names must be in the same order as they were in fit.
warnings.warn(message, FutureWarning)
0.7587824255767206
score =[]for i in c_2:
lr = LR(solver='liblinear',C=i).fit(X,y)
score.append(lr.score(vali_X,vali_y))
plt.figure()
plt.plot(c_2,score)
plt.show()#警告提示:文件名在未来将更改
D:\py1.1\lib\site-packages\sklearn\base.py:493: FutureWarning: The feature names should match those that were passed during fit. Starting version 1.2, an error will be raised.
Feature names must be in the same order as they were in fit.
warnings.warn(message, FutureWarning)
D:\py1.1\lib\site-packages\sklearn\base.py:493: FutureWarning: The feature names should match those that were passed during fit. Starting version 1.2, an error will be raised.
Feature names must be in the same order as they were in fit.
warnings.warn(message, FutureWarning)
D:\py1.1\lib\site-packages\sklearn\base.py:493: FutureWarning: The feature names should match those that were passed during fit. Starting version 1.2, an error will be raised.
Feature names must be in the same order as they were in fit.
warnings.warn(message, FutureWarning)
D:\py1.1\lib\site-packages\sklearn\base.py:493: FutureWarning: The feature names should match those that were passed during fit. Starting version 1.2, an error will be raised.
Feature names must be in the same order as they were in fit.
warnings.warn(message, FutureWarning)
D:\py1.1\lib\site-packages\sklearn\base.py:493: FutureWarning: The feature names should match those that were passed during fit. Starting version 1.2, an error will be raised.
Feature names must be in the same order as they were in fit.
warnings.warn(message, FutureWarning)
D:\py1.1\lib\site-packages\sklearn\base.py:493: FutureWarning: The feature names should match those that were passed during fit. Starting version 1.2, an error will be raised.
Feature names must be in the same order as they were in fit.
warnings.warn(message, FutureWarning)
D:\py1.1\lib\site-packages\sklearn\base.py:493: FutureWarning: The feature names should match those that were passed during fit. Starting version 1.2, an error will be raised.
Feature names must be in the same order as they were in fit.
warnings.warn(message, FutureWarning)
D:\py1.1\lib\site-packages\sklearn\base.py:493: FutureWarning: The feature names should match those that were passed during fit. Starting version 1.2, an error will be raised.
Feature names must be in the same order as they were in fit.
warnings.warn(message, FutureWarning)
D:\py1.1\lib\site-packages\sklearn\base.py:493: FutureWarning: The feature names should match those that were passed during fit. Starting version 1.2, an error will be raised.
Feature names must be in the same order as they were in fit.
warnings.warn(message, FutureWarning)
D:\py1.1\lib\site-packages\sklearn\base.py:493: FutureWarning: The feature names should match those that were passed during fit. Starting version 1.2, an error will be raised.
Feature names must be in the same order as they were in fit.
warnings.warn(message, FutureWarning)
D:\py1.1\lib\site-packages\sklearn\base.py:493: FutureWarning: The feature names should match those that were passed during fit. Starting version 1.2, an error will be raised.
Feature names must be in the same order as they were in fit.
warnings.warn(message, FutureWarning)
D:\py1.1\lib\site-packages\sklearn\base.py:493: FutureWarning: The feature names should match those that were passed during fit. Starting version 1.2, an error will be raised.
Feature names must be in the same order as they were in fit.
warnings.warn(message, FutureWarning)
D:\py1.1\lib\site-packages\sklearn\base.py:493: FutureWarning: The feature names should match those that were passed during fit. Starting version 1.2, an error will be raised.
Feature names must be in the same order as they were in fit.
warnings.warn(message, FutureWarning)
D:\py1.1\lib\site-packages\sklearn\base.py:493: FutureWarning: The feature names should match those that were passed during fit. Starting version 1.2, an error will be raised.
Feature names must be in the same order as they were in fit.
warnings.warn(message, FutureWarning)
D:\py1.1\lib\site-packages\sklearn\base.py:493: FutureWarning: The feature names should match those that were passed during fit. Starting version 1.2, an error will be raised.
Feature names must be in the same order as they were in fit.
warnings.warn(message, FutureWarning)
D:\py1.1\lib\site-packages\sklearn\base.py:493: FutureWarning: The feature names should match those that were passed during fit. Starting version 1.2, an error will be raised.
Feature names must be in the same order as they were in fit.
warnings.warn(message, FutureWarning)
D:\py1.1\lib\site-packages\sklearn\base.py:493: FutureWarning: The feature names should match those that were passed during fit. Starting version 1.2, an error will be raised.
Feature names must be in the same order as they were in fit.
warnings.warn(message, FutureWarning)
D:\py1.1\lib\site-packages\sklearn\base.py:493: FutureWarning: The feature names should match those that were passed during fit. Starting version 1.2, an error will be raised.
Feature names must be in the same order as they were in fit.
warnings.warn(message, FutureWarning)
D:\py1.1\lib\site-packages\sklearn\base.py:493: FutureWarning: The feature names should match those that were passed during fit. Starting version 1.2, an error will be raised.
Feature names must be in the same order as they were in fit.
warnings.warn(message, FutureWarning)
D:\py1.1\lib\site-packages\sklearn\base.py:493: FutureWarning: The feature names should match those that were passed during fit. Starting version 1.2, an error will be raised.
Feature names must be in the same order as they were in fit.
warnings.warn(message, FutureWarning)
lr.n_iter_
array([5], dtype=int32)
score =[]for i in[1,2,3,4,5,6]:
lr = LR(solver='liblinear',C=0.025,max_iter=i).fit(X,y)
score.append(lr.score(vali_X,vali_y))
plt.figure()
plt.plot([1,2,3,4,5,6],score)
plt.show()
D:\py1.1\lib\site-packages\sklearn\svm\_base.py:1225: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
warnings.warn(
D:\py1.1\lib\site-packages\sklearn\base.py:493: FutureWarning: The feature names should match those that were passed during fit. Starting version 1.2, an error will be raised.
Feature names must be in the same order as they were in fit.
warnings.warn(message, FutureWarning)
D:\py1.1\lib\site-packages\sklearn\svm\_base.py:1225: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
warnings.warn(
D:\py1.1\lib\site-packages\sklearn\base.py:493: FutureWarning: The feature names should match those that were passed during fit. Starting version 1.2, an error will be raised.
Feature names must be in the same order as they were in fit.
warnings.warn(message, FutureWarning)
D:\py1.1\lib\site-packages\sklearn\svm\_base.py:1225: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
warnings.warn(
D:\py1.1\lib\site-packages\sklearn\base.py:493: FutureWarning: The feature names should match those that were passed during fit. Starting version 1.2, an error will be raised.
Feature names must be in the same order as they were in fit.
warnings.warn(message, FutureWarning)
D:\py1.1\lib\site-packages\sklearn\svm\_base.py:1225: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
warnings.warn(
D:\py1.1\lib\site-packages\sklearn\base.py:493: FutureWarning: The feature names should match those that were passed during fit. Starting version 1.2, an error will be raised.
Feature names must be in the same order as they were in fit.
warnings.warn(message, FutureWarning)
D:\py1.1\lib\site-packages\sklearn\svm\_base.py:1225: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
warnings.warn(
D:\py1.1\lib\site-packages\sklearn\base.py:493: FutureWarning: The feature names should match those that were passed during fit. Starting version 1.2, an error will be raised.
Feature names must be in the same order as they were in fit.
warnings.warn(message, FutureWarning)
D:\py1.1\lib\site-packages\sklearn\base.py:493: FutureWarning: The feature names should match those that were passed during fit. Starting version 1.2, an error will be raised.
Feature names must be in the same order as they were in fit.
warnings.warn(message, FutureWarning)
D:\py1.1\lib\site-packages\sklearn\base.py:493: FutureWarning: The feature names should match those that were passed during fit. Starting version 1.2, an error will be raised.
Feature names must be in the same order as they were in fit.
warnings.warn(message, FutureWarning)
<AxesSubplot:title={'center':'ROC Curves'}, xlabel='False Positive Rate', ylabel='True Positive Rate'>