运用python进行信用卡客户违约预测
数据预处理
In [1]:
#路径设置
import os
os.chdir("E:\PYTHON数据分析20200924")
In [2]:
#数据读取
import pandas as pd
data=pd.read_csv('creditcard0.csv')
data=data.drop(['Unnamed: 0'],axis=1)
data.head()
Out[2]: data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class
5 rows × 29 columns
In [3]:
#缺失值处 ?
data.isnull().any()
data.isnull().sum()
print(data.isnull().any())
print(data.isnull().sum())
V1 False
V2 True
V3 False
V4 False
V5 False
V6 False
V7 False
V8 False
V9 False
V10 False
V11 True
V12 False
V13 False
V14 False
V15 False
V16 False
V17 False
V18 False
V19 False
V20 False
V21 True
V22 False
V23 False
V24 False
V25 False
V26 False
V27 False
V28 False
Class False
dtype: bool
V1 0
V2 12
V3 0
V4 0
V5 0
V6 0
V7 0
V8 0
V9 0
V10 0
V11 14
V12 0
V13 0
V14 0
V15 0
V16 0
V17 0
V18 0
V19 0
V20 0
V21 10
V22 0
V23 0
V24 0
V25 0
V26 0
V27 0
V28 0
Class 0
dtype: int64
In [4]:
#缺失值填 ?
data['V2']=data['V2'].fillna(data['V2'].mean())
data['V11']=data['V11'].fillna(data['V11'].mean())
data['V21']=data['V21'].fillna(data['V21'].mean())
#判断缺失值是否为0
print('数据现有缺失值个数为 ?')
print(data.isnull().sum().sum())
数据现有缺失值个数为 ?
0
In [5]:
#描述性统计分 ?
import matplotlib.pyplot as plt
print('data.describe:')
print(data.describe())
data.describe:
V1 V2 V3 V4 V5 V6 \
count 992.000000 992.000000 992.000000 992.000000 992.000000 992.000000
mean -2.362398 1.740766 -3.513749 2.227269 -1.586542 -0.655236
std 5.595568 3.805145 6.205772 3.231054 4.185663 1.776190
min -35.698345 -33.049877 -31.103685 -4.125352 -22.105532 -6.406267
25% -2.740788 -0.132073 -5.120349 -0.111571 -1.827729 -1.551056
50% -0.753172 0.958155 -1.369045 1.287041 -0.422765 -0.646884
75% 1.060972 2.716910 0.326804 4.250632 0.459740 0.097086
max 2.321264 22.057729 3.745841 12.114672 11.095089 6.474115
V7 V8 V9 V10 ... V20 \
count 992.000000 992.000000 992.000000 992.000000 ... 992.000000
mean -2.725728 0.268681 -1.269198 -2.840494 ... 0.193873
std 5.865285 4.861556 2.332071 4.506409 ... 1.305163
min -43.557242 -41.044261 -13.434066 -24.588262 ... -20.564506
25% -3.058707 -0.203540 -2.298358 -4.551515 ... -0.184205
50% -0.607251 0.139053 -0.670768 -0.882089 ... 0.034889
75% 0.256376 0.877002 0.178203 -0.034903 ... 0.406807
max 10.949696 20.007208 7.721003 8.440031 ... 13.657647
V21 V22 V23 V24 V25 V26 \
count 992.000000 992.000000 992.000000 992.000000 992.000000 992.000000
mean 0.330166 -0.007529 -0.036201 -0.040642 -0.004678 0.016186
std 2.796086 1.176176 1.245022 0.577243 0.673627 0.472193
min -22.797604 -8.887017 -19.254328 -2.028024 -4.781606 -1.152671
25% -0.180641 -0.541855 -0.232125 -0.395566 -0.354275 -0.279250
50% 0.139319 -0.024087 -0.020687 0.003091 0.030655 -0.020398
75% 0.653223 0.594705 0.192766 0.379027 0.382238 0.322613
max 27.202839 8.361985 7.452410 3.278158 2.208209 2.745261
V27 V28 Class
count 992.000000 992.000000 992.000000
mean 0.076917 0.051671 0.495968
std 1.013575 0.641466 0.500236
min -7.263482 -1.869290 0.000000
25% -0.064133 -0.052789 0.000000
50% 0.045379 0.030013 0.000000
75% 0.436937 0.218773 1.000000
max 3.052358 15.415925 1.000000
[8 rows x 29 columns]
In [6]:
print('data.std()')
print(data.std())
data.std()
V1 5.595568
V2 3.805145
V3 6.205772
V4 3.231054
V5 4.185663
V6 1.776190
V7 5.865285
V8 4.861556
V9 2.332071
V10 4.506409
V11 2.745349
V12 4.561692
V13 1.064155
V14 4.670392
V15 0.986559
V16 3.484126
V17 5.973109
V18 2.403695
V19 1.276793
V20 1.305163
V21 2.796086
V22 1.176176
V23 1.245022
V24 0.577243
V25 0.673627
V26 0.472193
V27 1.013575
V28 0.641466
Class 0.500236
dtype: float64
In [7]:
count_classes=pd.value_counts(data["Class"],sort=True).sort_index()
count_classes.plot(kind="bar")
plt.title("frequency of class")
plt.xlabel("class")
plt.ylabel("frequency")
Out[7]:
Text(0, 0.5, 'frequency')
In [8]:
data.hist()
plt.show()
模型建立¶
In [9]:
data.groupby(data.Class).mean()
Out[9]: data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
Class
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
e data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
Class
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
Class
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
Class
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
Class
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
Class
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
Class
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
Class
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
Class
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
Class
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
Class
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
Class
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
Class
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
Class
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
Class
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
Class
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
Class
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
Class
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
Class
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
Class
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
Class
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
Class
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
Class
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
Class
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
Class
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
Class
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
Class
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
Class
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
Class
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
Class
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
Class
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
Class
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
Class
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
Class
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
Class
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
Class
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
Class
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
Class
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
Class
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
Class
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
Class
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
Class
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
Class
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
Class
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
Class
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
Class
2 rows × 28 columns
In [11]:
#数据标准 ?
from sklearn import preprocessing
datax=data.iloc[:,0:28]
data_scale = preprocessing.scale(datax,axis=0, with_mean=True, with_std=True, copy=True)
data_scale=pd.DataFrame(data_scale)
In [12]:
data_scale.head()
Out[12]: data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">0123456789...18192021222324252627
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
0123456789...18192021222324252627
5 rows × 28 columns
In [13]:
#正则 ?
data_normalized = preprocessing.normalize(data_scale, norm='l2')
data_normalized=pd.DataFrame(data_normalized)
data_normalized.columns=['V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10', 'V11',
'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20', 'V21',
'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V28']
In [14]:
data_normalized.head()
Out[14]: data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28
5 rows × 28 columns
In [16]:
##IV值,变量选择
import numpy as np
def CalcIV_Single(Xvar, Yvar):
N_0 = np.sum(Yvar==0)
N_1 = np.sum(Yvar==1)
N_0_group = np.zeros(np.unique(Xvar).shape)
N_1_group = np.zeros(np.unique(Xvar).shape)
iv = 0
for i in range(len(np.unique(Xvar))):
N_0_group[i] = Yvar[(Xvar == np.unique(Xvar)[i]) & (Yvar == 0)].count()
N_1_group[i] = Yvar[(Xvar == np.unique(Xvar)[i]) & (Yvar == 1)].count()
if N_0_group[i] == 0 or N_1_group[i] == 0:
iv = iv + 0
else:
iv =iv + (N_0_group[i]/N_0 - N_1_group[i]/N_1) * np.log((N_0_group[i]/N_0)/(N_1_group[i]/N_1))
return iv
def rangeMark(num,vec):
mark = int()
for i in range(len(vec)-1):
if num >= vec[i] and num <= vec[i+1]:
mark = i
break
return mark
def cut_group(Xvar , n):
bins = []
labels = []
for i in range(n+1):
bins.append(np.percentile(Xvar , (1/ n ) * i * 100))
for i in Xvar:
temp = rangeMark(i,bins)
labels.append(temp)
return labels
def CalcIV_DataFrame_char(df_data,var_lst,Yvar):
iv_res=[]
for varname in var_lst:
iv=CalcIV_Single(df_data[varname],Yvar)
iv_res.append(iv)
df_iv=pd.DataFrame({"varname":var_lst,"iv":iv_res})
df_iv=df_iv.sort_values(by='iv',ascending=False)
cols=list(df_iv)
cols.insert(0,cols.pop(cols.index('varname')))
df_iv=df_iv.loc[:,cols]
return df_iv
def CalcIV_DataFrame_num(df_data , var_lst_num ,Yvar ,n):
for var_name in var_lst_num:
df_data[var_name]=cut_group(df_data[var_name],n)
res=CalcIV_DataFrame_char(df_data,var_lst_num,Yvar)
return res
var_lst_num = ['V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10', 'V11',
'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20', 'V21',
'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V28']
data_normalized['Class']=data['Class']
Yvar = data['Class']
n= 10
df_iv_num = CalcIV_DataFrame_num(data_normalized , var_lst_num ,Yvar , n)
print(df_iv_num)
varname iv
3 V4 4.142535
13 V14 3.102916
1 V2 2.973588
2 V3 2.670628
20 V21 2.012535
15 V16 1.963787
26 V27 1.858106
9 V10 1.782953
11 V12 1.735026
0 V1 1.734713
4 V5 1.592449
10 V11 1.514543
8 V9 1.496646
5 V6 1.445069
16 V17 1.317364
7 V8 1.306643
6 V7 1.261211
27 V28 1.184764
17 V18 1.133184
19 V20 0.847962
18 V19 0.699074
23 V24 0.552923
22 V23 0.383712
25 V26 0.191207
24 V25 0.146362
21 V22 0.126493
12 V13 0.123377
14 V15 0.116015
In [17]:
#训练集和测试 ?
from sklearn.model_selection import train_test_split
train_X,test_X,train_y,test_y=train_test_split(data_normalized,data_normalized[['Class']],train_size=0.8,random_state=0)
print('训练集数据大小为',len(train_X))
print('测试集数据大小为',len(test_X))
训练集数据大小为 793
测试集数据大小为 199
In [18]:
#导入模型
from sklearn.linear_model import LogisticRegression
modelLR=LogisticRegression()
In [19]:
modelLR.fit(train_X,train_y)
D:\Programs\Python\anaconda\lib\site-packages\sklearn\linear_model\logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
FutureWarning)
D:\Programs\Python\anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
y = column_or_1d(y, warn=True)
Out[19]:
LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
intercept_scaling=1, l1_ratio=None, max_iter=100,
multi_class='warn', n_jobs=None, penalty='l2',
random_state=None, solver='warn', tol=0.0001, verbose=0,
warm_start=False)
In [20]:
modelLR.score(test_X,test_y)
Out[20]:
0.9949748743718593
In [21]:
#模型参数
b=modelLR.coef_
a=modelLR.intercept_
print('常数 ?:')
print(a)
print('系数 ?:')
print(b)
常数 ?:
[-0.02984619]
系数 ?:
[[ 1.33550826e-01 -4.85731951e-02 2.27625736e-01 4.77825082e-01
-8.49290322e-03 -9.47925469e-02 7.92403991e-02 -2.06718071e-01
-1.80396673e-01 -2.99568456e-01 1.96506028e-01 -1.70461141e-01
-6.28087855e-02 -4.87335082e-01 -4.74602886e-02 -5.73627364e-02
9.21413773e-03 2.50257177e-02 9.27967310e-04 -7.30495974e-02
7.27455952e-02 5.46365203e-02 6.37034556e-02 -4.21183450e-02
5.24716355e-02 -1.51390014e-01 9.31590884e-02 -9.19248635e-03
5.52723635e+00]]
模型评估¶
In [22]:
#混淆矩阵
from sklearn.metrics import confusion_matrix
pred_y=modelLR.predict(test_X)
cm=confusion_matrix(test_y.astype(str),pred_y.astype(str))
print('混淆矩阵如下 ?')
print(cm)
混淆矩阵如下 ?
[[106 0]
[ 1 92]]
In [23]:
#混淆矩阵热度 ?
import seaborn as sns
sns.heatmap(cm,annot=True)
Out[23]:
In [24]:
#计算auc
from sklearn.metrics import roc_curve, auc
fpr,tpr,threshold = roc_curve(test_y, pred_y)
roc_auc = auc(fpr,tpr)
print('AUC的值为 ?')
print(roc_auc)
AUC的值为 ?
0.9946236559139785
In [25]:
#ROC曲线
import matplotlib.pyplot as plt
plt.figure()
lw = 2
plt.figure(figsize=(10,10))
plt.plot(fpr, tpr, color='r',
lw=lw, label='ROC curve (area = %0.2f)' % roc_auc) ###假正率为横坐标,真正率为纵坐标做曲线
plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.0])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic example')
plt.legend(loc="lower right")
plt.show()
In [47]:
#结果预测
#读取需要预测的样本
predata=pd.read_csv('predata.csv')
predata.head()
predata.columns
Out[47]:
Index(['Unnamed: 0', 'V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9',
'V10', 'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19',
'V20', 'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V28', 'Class'],
dtype='object')
In [48]:
#预测
predata=predata.iloc[:,1:30]
pred_y=modelLR.predict(predata)
In [49]:
print(pred_y)
[0 1 1 0 0 1 1 1 0 0 0 0 1 0 0 0 0 0 1 1 1 1 0 1 0 0 1 1 1 0 0 0 1 1 0 0 1
0 0 0 0 0 1 1 1 0 0 1 0 0 1 0 1 1 1 1 1 0 1 1 0 1 0 0 1 0 1 1 1 1 1 0 0 0
1 1 1 0 0 0 0 1 0]
In [50]:
from collections import Counter
Counter(pred_y)
Out[50]:
Counter({0: 43, 1: 40})