GitHub: https://github.com/RealEmperor/Python-for-Data-Analysis
逻辑回归
案例1:银行贷款违约分析
参数初始化
import pandas as pd
# 参数初始化
filename = 'data/bankloan.xls'
data = pd.read_excel(filename)
print(data.head())
年龄 教育 工龄 地址 收入 负债率 信用卡负债 其他负债 违约
0 41 3 17 12 176 9.3 11.359392 5.008608 1
1 27 1 10 6 31 17.3 1.362202 4.000798 0
2 40 1 15 14 55 5.5 0.856075 2.168925 0
3 41 1 15 14 120 2.9 2.658720 0.821280 0
4 24 2 2 0 28 17.3 1.787436 3.056564 1
x = data.iloc[:, :8].as_matrix()
print(x)
[[ 41. 3. 17. ..., 9.3 11.359392 5.008608]
[ 27. 1. 10. ..., 17.3 1.362202 4.000798]
[ 40. 1. 15. ..., 5.5 0.856075 2.168925]
...,
[ 33. 1. 15. ..., 7.6 0.491264 1.940736]
[ 45. 1. 19. ..., 8.4 2.302608 4.165392]
[ 37. 1. 12. ..., 14.7 2.994684 3.473316]]
y = data.iloc[:, 8].as_matrix()
print(y)
[1 0 0 0 1 0 0 0 1 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0
0 1 0 0 0 0 0 1 0 0 0 1 0 1 0 1 0 0 1 0 0 0 0 0 1 0 0 0 1 1 0 1 1 0 0 0 0
0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 1 0 1 0 1 1 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0
0 0 0 0 0 0 0 1 1 0 1 1 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0
0 0 0 1 0 0 0 0 0 1 1 0 0 0 1 0 1 1 1 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0
0 1 0 0 0 1 0 1 1 0 0 1 0 0 1 0 1 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 1 1 0 0
0 1 1 0 0 0 0 0 0 1 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 1 1 0 1 0 1 1 0 0 0 0 0 1 0 0 0 0 0 0 1 0
0 0 1 0 1 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 1 0 1 0 1 1 1 0 0 0 0 0 1 1
0 0 0 0 0 1 1 0 0 0 1 0 1 0 0 0 0 0 0 0 0 1 1 1 1 0 0 0 0 0 0 1 0 0 0 0 0
0 0 0 1 0 0 0 0 0 0 0 1 1 0 1 0 0 0 0 0 0 1 0 1 1 0 0 0 0 1 0 0 0 1 0 0 0
0 0 0 0 0 0 1 1 0 0 0 1 0 1 0 0 0 1 0 0 0 1 0 1 0 0 1 0 0 0 0 0 0 0 0 0 1
1 0 0 0 1 1 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 1 0 0 1 1 1 0 0 0 0 1 0 0 0 0
1 0 0 1 0 1 1 1 0 0 1 1 1 1 1 0 0 1 1 0 1 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0
0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 0 0 0 0 1 0 1 0 0 0 1 0 0 0 0 1 1 1 1 0 0 0
0 0 0 1 0 0 0 0 0 1 1 0 0 1 0 1 0 0 1 1 0 1 0 0 0 0 0 0 1 1 0 0 0 0 1 1 1
0 0 1 0 1 1 0 1 0 0 0 0 1 0 1 0 0 0 1 0 1 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0
0 0 0 1 1 0 0 1 1 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0
0 0 0 0 0 0 0 1 0 0 0 1 0 0 1 1 0 0 0 1 0 1 0 0 0 0 1 0 0 1 0 0 0 0]
建立随机逻辑回归模型,筛选变量
from sklearn.linear_model import LogisticRegression as LR
from sklearn.linear_model import RandomizedLogisticRegression as RLR
rlr = RLR() # 建立随机逻辑回归模型,筛选变量
rlr.fit(x,<