python信用卡违约预测分析_运用python进行信用卡 客户违约预测

数据预处理¶

In [1]:

#路径设置

import os

os.chdir("E:\PYTHON数据分析20200324")

In [2]:

#数据读取

import pandas as pd

data=pd.read_csv('creditcard0.csv')

data=data.drop(['Unnamed: 0'],axis=1)

data.head()

Out[2]: data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

V1V2V3V4V5V6V7V8V9V10...V20V21V22V23V24V25V26V27V28Class

5 rows × 29 columns

In [3]:

#缺失值处 ?

data.isnull().any()

data.isnull().sum()

print(data.isnull().any())

print(data.isnull().sum())

V1 False

V2 True

V3 False

V4 False

V5 False

V6 False

V7 False

V8 False

V9 False

V10 False

V11 True

V12 False

V13 False

V14 False

V15 False

V16 False

V17 False

V18 False

V19 False

V20 False

V21 True

V22 False

V23 False

V24 False

V25 False

V26 False

V27 False

V28 False

Class False

dtype: bool

V1 0

V2 12

V3 0

V4 0

V5 0

V6 0

V7 0

V8 0

V9 0

V10 0

V11 14

V12 0

V13 0

V14 0

V15 0

V16 0

V17 0

V18 0

V19 0

V20 0

V21 10

V22 0

V23 0

V24 0

V25 0

V26 0

V27 0

V28 0

Class 0

dtype: int64

In [4]:

#缺失值填 ?

data['V2']=data['V2'].fillna(data['V2'].mean())

data['V11']=data['V11'].fillna(data['V11'].mean())

data['V21']=data['V21'].fillna(data['V21'].mean())

#判断缺失值是否为0

print('数据现有缺失值个数为 ?')

print(data.isnull().sum().sum())

数据现有缺失值个数为 ?

0

In [5]:

#描述性统计分 ?

import matplotlib.pyplot as plt

print('data.describe:')

print(data.describe())

data.describe:

V1 V2 V3 V4 V5 V6 \

count 992.000000 992.000000 992.000000 992.000000 992.000000 992.000000

mean -2.362398 1.740766 -3.513749 2.227269 -1.586542 -0.655236

std 5.595568 3.805145 6.205772 3.231054 4.185663 1.776190

min -35.698345 -33.049877 -31.103685 -4.125352 -22.105532 -6.406267

25% -2.740788 -0.132073 -5.120349 -0.111571 -1.827729 -1.551056

50% -0.753172 0.958155 -1.369045 1.287041 -0.422765 -0.646884

75% 1.060972 2.716910 0.326804 4.250632 0.459740 0.097086

max 2.321264 22.057729 3.745841 12.114672 11.095089 6.474115

V7 V8 V9 V10 ... V20 \

count 992.000000 992.000000 992.000000 992.000000 ... 992.000000

mean -2.725728 0.268681 -1.269198 -2.840494 ... 0.193873

std 5.865285 4.861556 2.332071 4.506409 ... 1.305163

min -43.557242 -41.044261 -13.434066 -24.588262 ... -20.564506

25% -3.058707 -0.203540 -2.298358 -4.551515 ... -0.184205

50% -0.607251 0.139053 -0.670768 -0.882089 ... 0.034889

75% 0.256376 0.877002 0.178203 -0.034903 ... 0.406807

max 10.949696 20.007208 7.721003 8.440031 ... 13.657647

V21 V22 V23 V24 V25 V26 \

count 992.000000 992.000000 992.000000 992.000000 992.000000 992.000000

mean 0.330166 -0.007529 -0.036201 -0.040642 -0.004678 0.016186

std 2.796086 1.176176 1.245022 0.577243 0.673627 0.472193

min -22.797604 -8.887017 -19.254328 -2.028024 -4.781606 -1.152671

25% -0.180641 -0.541855 -0.232125 -0.395566 -0.354275 -0.279250

50% 0.139319 -0.024087 -0.020687 0.003091 0.030655 -0.020398

75% 0.653223 0.594705 0.192766 0.379027 0.382238 0.322613

max 27.202839 8.361985 7.452410 3.278158 2.208209 2.745261

V27 V28 Class

count 992.000000 992.000000 992.000000

mean 0.076917 0.051671 0.495968

std 1.013575 0.641466 0.500236

min -7.263482 -1.869290 0.000000

25% -0.064133 -0.052789 0.000000

50% 0.045379 0.030013 0.000000

75% 0.436937 0.218773 1.000000

max 3.052358 15.415925 1.000000

[8 rows x 29 columns]

In [6]:

print('data.std()')

print(data.std())

data.std()

V1 5.595568

V2 3.805145

V3 6.205772

V4 3.231054

V5 4.185663

V6 1.776190

V7 5.865285

V8 4.861556

V9 2.332071

V10 4.506409

V11 2.745349

V12 4.561692

V13 1.064155

V14 4.670392

V15 0.986559

V16 3.484126

V17 5.973109

V18 2.403695

V19 1.276793

V20 1.305163

V21 2.796086

V22 1.176176

V23 1.245022

V24 0.577243

V25 0.673627

V26 0.472193

V27 1.013575

V28 0.641466

Class 0.500236

dtype: float64

In [7]:

count_classes=pd.value_counts(data["Class"],sort=True).sort_index()

count_classes.plot(kind="bar")

plt.title("frequency of class")

plt.xlabel("class")

plt.ylabel("frequency")

Out[7]:

Text(0, 0.5, 'frequency')

In [8]:

data.hist()

plt.show()

模型建立¶

In [9]:

data.groupby(data.Class).mean()

Out[9]: data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

Class

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

e data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

Class

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

Class

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

Class

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

Class

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

Class

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

Class

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

Class

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

Class

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

Class

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

Class

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

Class

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

Class

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

Class

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

Class

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

Class

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

Class

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

Class

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

Class

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

Class

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

Class

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

Class

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

Class

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

Class

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

Class

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

Class

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

Class

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

Class

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

Class

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

Class

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

Class

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

Class

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

Class

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

Class

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

Class

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

Class

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

Class

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

Class

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

Class

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

Class

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

Class

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

Class

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

Class

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

Class

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

Class

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

Class

2 rows × 28 columns

In [11]:

#数据标准 ?

from sklearn import preprocessing

datax=data.iloc[:,0:28]

data_scale = preprocessing.scale(datax,axis=0, with_mean=True, with_std=True, copy=True)

data_scale=pd.DataFrame(data_scale)

In [12]:

data_scale.head()

Out[12]: data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">0123456789...18192021222324252627

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

0123456789...18192021222324252627

5 rows × 28 columns

In [13]:

#正则 ?

data_normalized = preprocessing.normalize(data_scale, norm='l2')

data_normalized=pd.DataFrame(data_normalized)

data_normalized.columns=['V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10', 'V11',

'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20', 'V21',

'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V28']

In [14]:

data_normalized.head()

Out[14]: data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

ble data-draft-node="block" data-draft-type="table" data-size="normal" data-row-style="normal">

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

V1V2V3V4V5V6V7V8V9V10...V19V20V21V22V23V24V25V26V27V28

5 rows × 28 columns

In [16]:

##IV值,变量选择

import numpy as np

def CalcIV_Single(Xvar, Yvar):

N_0 = np.sum(Yvar==0)

N_1 = np.sum(Yvar==1)

N_0_group = np.zeros(np.unique(Xvar).shape)

N_1_group = np.zeros(np.unique(Xvar).shape)

iv = 0

for i in range(len(np.unique(Xvar))):

N_0_group[i] = Yvar[(Xvar == np.unique(Xvar)[i]) & (Yvar == 0)].count()

N_1_group[i] = Yvar[(Xvar == np.unique(Xvar)[i]) & (Yvar == 1)].count()

if N_0_group[i] == 0 or N_1_group[i] == 0:

iv = iv + 0

else:

iv =iv + (N_0_group[i]/N_0 - N_1_group[i]/N_1) * np.log((N_0_group[i]/N_0)/(N_1_group[i]/N_1))

return iv

def rangeMark(num,vec):

mark = int()

for i in range(len(vec)-1):

if num >= vec[i] and num <= vec[i+1]:

mark = i

break

return mark

def cut_group(Xvar , n):

bins = []

labels = []

for i in range(n+1):

bins.append(np.percentile(Xvar , (1/ n ) * i * 100))

for i in Xvar:

temp = rangeMark(i,bins)

labels.append(temp)

return labels

def CalcIV_DataFrame_char(df_data,var_lst,Yvar):

iv_res=[]

for varname in var_lst:

iv=CalcIV_Single(df_data[varname],Yvar)

iv_res.append(iv)

df_iv=pd.DataFrame({"varname":var_lst,"iv":iv_res})

df_iv=df_iv.sort_values(by='iv',ascending=False)

cols=list(df_iv)

cols.insert(0,cols.pop(cols.index('varname')))

df_iv=df_iv.loc[:,cols]

return df_iv

def CalcIV_DataFrame_num(df_data , var_lst_num ,Yvar ,n):

for var_name in var_lst_num:

df_data[var_name]=cut_group(df_data[var_name],n)

res=CalcIV_DataFrame_char(df_data,var_lst_num,Yvar)

return res

var_lst_num = ['V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10', 'V11',

'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20', 'V21',

'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V28']

data_normalized['Class']=data['Class']

Yvar = data['Class']

n= 10

df_iv_num = CalcIV_DataFrame_num(data_normalized , var_lst_num ,Yvar , n)

print(df_iv_num)

varname iv

3 V4 4.142535

13 V14 3.102916

1 V2 2.973588

2 V3 2.670628

20 V21 2.012535

15 V16 1.963787

26 V27 1.858106

9 V10 1.782953

11 V12 1.735026

0 V1 1.734713

4 V5 1.592449

10 V11 1.514543

8 V9 1.496646

5 V6 1.445069

16 V17 1.317364

7 V8 1.306643

6 V7 1.261211

27 V28 1.184764

17 V18 1.133184

19 V20 0.847962

18 V19 0.699074

23 V24 0.552923

22 V23 0.383712

25 V26 0.191207

24 V25 0.146362

21 V22 0.126493

12 V13 0.123377

14 V15 0.116015

In [17]:

#训练集和测试 ?

from sklearn.model_selection import train_test_split

train_X,test_X,train_y,test_y=train_test_split(data_normalized,data_normalized[['Class']],train_size=0.8,random_state=0)

print('训练集数据大小为',len(train_X))

print('测试集数据大小为',len(test_X))

训练集数据大小为 793

测试集数据大小为 199

In [18]:

#导入模型

from sklearn.linear_model import LogisticRegression

modelLR=LogisticRegression()

In [19]:

modelLR.fit(train_X,train_y)

D:\Programs\Python\anaconda\lib\site-packages\sklearn\linear_model\logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.

FutureWarning)

D:\Programs\Python\anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().

y = column_or_1d(y, warn=True)

Out[19]:

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,

intercept_scaling=1, l1_ratio=None, max_iter=100,

multi_class='warn', n_jobs=None, penalty='l2',

random_state=None, solver='warn', tol=0.0001, verbose=0,

warm_start=False)

In [20]:

modelLR.score(test_X,test_y)

Out[20]:

0.9949748743718593

In [21]:

#模型参数

b=modelLR.coef_

a=modelLR.intercept_

print('常数 ?:')

print(a)

print('系数 ?:')

print(b)

常数 ?:

[-0.02984619]

系数 ?:

[[ 1.33550826e-01 -4.85731951e-02 2.27625736e-01 4.77825082e-01

-8.49290322e-03 -9.47925469e-02 7.92403991e-02 -2.06718071e-01

-1.80396673e-01 -2.99568456e-01 1.96506028e-01 -1.70461141e-01

-6.28087855e-02 -4.87335082e-01 -4.74602886e-02 -5.73627364e-02

9.21413773e-03 2.50257177e-02 9.27967310e-04 -7.30495974e-02

7.27455952e-02 5.46365203e-02 6.37034556e-02 -4.21183450e-02

5.24716355e-02 -1.51390014e-01 9.31590884e-02 -9.19248635e-03

5.52723635e+00]]

模型评估¶

In [22]:

#混淆矩阵

from sklearn.metrics import confusion_matrix

pred_y=modelLR.predict(test_X)

cm=confusion_matrix(test_y.astype(str),pred_y.astype(str))

print('混淆矩阵如下 ?')

print(cm)

混淆矩阵如下 ?

[[106 0]

[ 1 92]]

In [23]:

#混淆矩阵热度 ?

import seaborn as sns

sns.heatmap(cm,annot=True)

Out[23]:

In [24]:

#计算auc

from sklearn.metrics import roc_curve, auc

fpr,tpr,threshold = roc_curve(test_y, pred_y)

roc_auc = auc(fpr,tpr)

print('AUC的值为 ?')

print(roc_auc)

AUC的值为 ?

0.9946236559139785

In [25]:

#ROC曲线

import matplotlib.pyplot as plt

plt.figure()

lw = 2

plt.figure(figsize=(10,10))

plt.plot(fpr, tpr, color='r',

lw=lw, label='ROC curve (area = %0.2f)' % roc_auc) ###假正率为横坐标,真正率为纵坐标做曲线

plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')

plt.xlim([0.0, 1.0])

plt.ylim([0.0, 1.0])

plt.xlabel('False Positive Rate')

plt.ylabel('True Positive Rate')

plt.title('Receiver operating characteristic example')

plt.legend(loc="lower right")

plt.show()

In [47]:

#结果预测

#读取需要预测的样本

predata=pd.read_csv('predata.csv')

predata.head()

predata.columns

Out[47]:

Index(['Unnamed: 0', 'V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9',

'V10', 'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19',

'V20', 'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V28', 'Class'],

dtype='object')

In [48]:

#预测

predata=predata.iloc[:,1:30]

pred_y=modelLR.predict(predata)

In [49]:

print(pred_y)

[0 1 1 0 0 1 1 1 0 0 0 0 1 0 0 0 0 0 1 1 1 1 0 1 0 0 1 1 1 0 0 0 1 1 0 0 1

0 0 0 0 0 1 1 1 0 0 1 0 0 1 0 1 1 1 1 1 0 1 1 0 1 0 0 1 0 1 1 1 1 1 0 0 0

1 1 1 0 0 0 0 1 0]

In [50]:

from collections import Counter

Counter(pred_y)

Out[50]:

Counter({0: 43, 1: 40})

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值