# -*- coding: utf-8 -*-
"""
Created on Thu Feb 1 13:33:50 2018
@author: Administrator
"""
import pandas as pd
path='F:/python/3python数据分析与挖掘实战/图书配套数据、代码/chapter5/demo/data/bankloan.xls'
df=pd.read_excel(path)
x=df.iloc[:,:8].as_matrix()
y=df.iloc[:,8].as_matrix()
from sklearn.linear_model import LogisticRegression as LR
from sklearn.linear_model import RandomizedLogisticRegression as RLR
rlr = RLR()#随机逻辑回归模型
rlr.fit(x,y)
rlr.get_support()#获取特征筛选结果
#rlr.scores_ 各个特征的分数
print('通过随机逻辑回归模型筛选特征结束')
print('有效特征为:%s' % ','.join(df.columns[rlr.get_support()]))
x=df[df.columns[rlr.get_support()]].as_matrix()#筛选特征
lr=LR()#逻辑回归模型
lr.fit(x,y)
print('逻辑回归模型训练结束')
print('模型的平均正确率:%s' % lr.score(x,y))
lr.predict(x[0].reshape(1,-1))#预测
df
Out[26]:
年龄 教育 工龄 地址 收入 负债率 信用卡负债 其他负债 违约
0 41 3 17 12 176 9.3 11.359392 5.008608 1
1 27 1 10 6 31 17.3 1.362202 4.000798 0
2 40 1 15 14 55 5.5 0.856075 2.168925 0
3 41 1 15 14 120 2.9 2.658720 0.821280 0
4 24 2 2 0 28 17.3 1.787436 3.056564 1
5 41 2 5 5 25 10.2 0.392700 2.157300 0
6 39 1 20 9 67 30.6 3.833874 16.668126 0
7 43 1 12 11 38 3.6 0.128592 1.239408 0
8 24 1 3 4 19 24.4 1.358348 3.277652 1
9 36 1 0 13 25 19.7 2.777700 2.147300 0
10 27 1 0 1 16 1.7 0.182512 0.089488 0
11 25 1 4 0 23 5.2 0.252356 0.943644 0
12 52 1 24 14 64 10.0 3.929600 2.470400 0
13 37 1 6 9 29 16.3 1.715901 3.011099 0
14 48 1 22 15 100 9.1 3.703700 5.396300 0
15 36 2 9 6 49 8.6 0.817516 3.396484 1
16 36 2 13 6 41 16.4 2.918216 3.805784 1
17 43 1 23 19 72 7.6 1.181952 4.290048 0
18 39 1 6 9 61 5.7 0.563274 2.913726 0
19 41 3 0 21 26 1.7 0.099008 0.342992 0
20 39 1 22 3 52 3.2 1.154816 0.509184 0
21 47 1 17 21 43 5.6 0.587552 1.820448 0
22 28 1 3 6 26 10.0 0.431600 2.168400 0
23 29 1 8 6 27 9.8 0.402192 2.243808 0
24 21 2 1 2 16 18.0 0.241920 2.638080 1
25 25 4 0 2 32 17.6 2.140160 3.491840 0
26 45 2 9 26 69 6.7 0.707319 3.915681 0
27 43 1 25 21 64 16.7 0.951232 9.736768 0
28 33 2 12 8 58 18.4 3.084208 7.587792 0
29 26 3 2 1 37 14.2 0.204906 5.049094 0
.. .. .. .. .. ... ... ... ... ..
670 23 2 3 4 24 6.3 0.551880 0.960120 0
671 27 1 0 7 18 12.8 0.582912 1.721088 0
672 34 1 6 1 20 1.2 0.042480 0.197520 0
673 35 1 0 5 34 11.1 1.369962 2.404038 1
674 24 2 4 4 20 3.7 0.324120 0.415880 0
675 48 1 30 8 101 6.4 1.874560 4.589440 0
676 26 2 8 1 40 11.8 0.443680 4.276320 0
677 40 1 6 9 36 2.1 0.390852 0.365148 1
678 34 1 9 8 48 9.3 0.419616 4.044384 0
679 35 1 17 4 42 3.0 0.093240 1.166760 0
680 30 1 7 2 33 25.4 1.165098 7.216902 1
681 20 1 4 0 14 9.7 0.200984 1.157016 1
682 36 4 1 17 30 11.5 0.324300 3.125700 0
683 21 1 1 1 16 6.3 0.141120 0.866880 0
684 34 1 18 10 53 10.5 0.840315 4.724685 0
685 35 1 7 5 39 16.1 1.701609 4.577391 1
686 35 3 1 4 20 7.9 0.853200 0.726800 0
687 34 1 10 1 33 10.3 2.501664 0.897336 1
688 33 1 12 12 68 10.8 1.365984 5.978016 0
689 30 1 4 2 18 10.7 0.227268 1.698732 0
690 24 2 0 5 16 7.3 0.024528 1.143472 0
691 47 1 31 8 253 7.2 9.308376 8.907624 0
692 53 1 0 26 27 28.9 2.754459 5.048541 1
693 22 3 0 2 20 4.7 0.219020 0.720980 0
694 48 2 6 1 66 12.1 2.315940 5.670060 0
695 36 2 6 15 27 4.6 0.262062 0.979938 1
696 29 2 6 4 21 11.5 0.369495 2.045505 0
697 33 1 15 3 32 7.6 0.491264 1.940736 0
698 45 1 19 22 77 8.4 2.302608 4.165392 0
699 37 1 12 14 44 14.7 2.994684 3.473316 0
[700 rows x 9 columns]
runfile('C:/Users/Administrator/Desktop/87.py', wdir='C:/Users/Administrator/Desktop')
通过随机逻辑回归模型筛选特征结束
有效特征为:工龄,地址,负债率,信用卡负债
逻辑回归模型训练结束
模型的平均正确率:0.814285714286