import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib. pyplot as plt
import seaborn as sns
% matplotlib inline
plt. rcParams[ 'font.sans-serif' ] = [ 'KaiTi' ]
plt. rcParams[ 'axes.unicode_minus' ] = False
import warnings
warnings. filterwarnings( "ignore" )
数据导入
data = pd. read_csv( 'train.csv' , index_col = 'id' )
pd. read_csv?
data. head( )
loanAmnt term interestRate installment grade subGrade employmentTitle employmentLength homeOwnership annualIncome ... n5 n6 n7 n8 n9 n10 n11 n12 n13 n14 id 0 35000.0 5 19.52 917.97 E E2 320.0 2 years 2 110000.0 ... 9.0 8.0 4.0 12.0 2.0 7.0 0.0 0.0 0.0 2.0 1 18000.0 5 18.49 461.90 D D2 219843.0 5 years 0 46000.0 ... NaN NaN NaN NaN NaN 13.0 NaN NaN NaN NaN 2 12000.0 5 16.99 298.17 D D3 31698.0 8 years 0 74000.0 ... 0.0 21.0 4.0 5.0 3.0 11.0 0.0 0.0 0.0 4.0 3 11000.0 3 7.26 340.96 A A4 46854.0 10+ years 1 118000.0 ... 16.0 4.0 7.0 21.0 6.0 9.0 0.0 0.0 0.0 1.0 4 3000.0 3 12.99 101.07 C C2 54.0 NaN 1 29000.0 ... 4.0 9.0 10.0 15.0 7.0 12.0 0.0 0.0 0.0 4.0
5 rows × 46 columns
data. describe( )
loanAmnt term interestRate installment employmentTitle homeOwnership annualIncome verificationStatus isDefault purpose ... n5 n6 n7 n8 n9 n10 n11 n12 n13 n14 count 800000.000000 800000.000000 800000.000000 800000.000000 799999.000000 800000.000000 8.000000e+05 800000.000000 800000.000000 800000.000000 ... 759730.000000 759730.000000 759730.000000 759729.000000 759730.000000 766761.000000 730248.000000 759730.000000 759730.000000 759730.000000 mean 14416.818875 3.482745 13.238391 437.947723 72005.351714 0.614213 7.613391e+04 1.009683 0.199513 1.745982 ... 8.107937 8.575994 8.282953 14.622488 5.592345 11.643896 0.000815 0.003384 0.089366 2.178606 std 8716.086178 0.855832 4.765757 261.460393 106585.640204 0.675749 6.894751e+04 0.782716 0.399634 2.367453 ... 4.799210 7.400536 4.561689 8.124610 3.216184 5.484104 0.030075 0.062041 0.509069 1.844377 min 500.000000 3.000000 5.310000 15.690000 0.000000 0.000000 0.000000e+00 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 25% 8000.000000 3.000000 9.750000 248.450000 427.000000 0.000000 4.560000e+04 0.000000 0.000000 0.000000 ... 5.000000 4.000000 5.000000 9.000000 3.000000 8.000000 0.000000 0.000000 0.000000 1.000000 50% 12000.000000 3.000000 12.740000 375.135000 7755.000000 1.000000 6.500000e+04 1.000000 0.000000 0.000000 ... 7.000000 7.000000 7.000000 13.000000 5.000000 11.000000 0.000000 0.000000 0.000000 2.000000 75% 20000.000000 3.000000 15.990000 580.710000 117663.500000 1.000000 9.000000e+04 2.000000 0.000000 4.000000 ... 11.000000 11.000000 10.000000 19.000000 7.000000 14.000000 0.000000 0.000000 0.000000 3.000000 max 40000.000000 5.000000 30.990000 1715.420000 378351.000000 5.000000 1.099920e+07 2.000000 1.000000 13.000000 ... 70.000000 132.000000 79.000000 128.000000 45.000000 82.000000 4.000000 4.000000 39.000000 30.000000
8 rows × 41 columns
data. info( )
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 800000 entries, 0 to 799999
Data columns (total 47 columns):
id 800000 non-null int64
loanAmnt 800000 non-null float64
term 800000 non-null int64
interestRate 800000 non-null float64
installment 800000 non-null float64
grade 800000 non-null object
subGrade 800000 non-null object
employmentTitle 799999 non-null float64
employmentLength 753201 non-null object
homeOwnership 800000 non-null int64
annualIncome 800000 non-null float64
verificationStatus 800000 non-null int64
issueDate 800000 non-null object
isDefault 800000 non-null int64
purpose 800000 non-null int64
postCode 799999 non-null float64
regionCode 800000 non-null int64
dti 799761 non-null float64
delinquency_2years 800000 non-null float64
ficoRangeLow 800000 non-null float64
ficoRangeHigh 800000 non-null float64
openAcc 800000 non-null float64
pubRec 800000 non-null float64
pubRecBankruptcies 799595 non-null float64
revolBal 800000 non-null float64
revolUtil 799469 non-null float64
totalAcc 800000 non-null float64
initialListStatus 800000 non-null int64
applicationType 800000 non-null int64
earliesCreditLine 800000 non-null object
title 799999 non-null float64
policyCode 800000 non-null float64
n0 759730 non-null float64
n1 759730 non-null float64
n2 759730 non-null float64
n2.1 759730 non-null float64
n4 766761 non-null float64
n5 759730 non-null float64
n6 759730 non-null float64
n7 759730 non-null float64
n8 759729 non-null float64
n9 759730 non-null float64
n10 766761 non-null float64
n11 730248 non-null float64
n12 759730 non-null float64
n13 759730 non-null float64
n14 759730 non-null float64
dtypes: float64(33), int64(9), object(5)
memory usage: 286.9+ MB
data. columns
Index(['loanAmnt', 'term', 'interestRate', 'installment', 'grade', 'subGrade',
'employmentTitle', 'employmentLength', 'homeOwnership', 'annualIncome',
'verificationStatus', 'issueDate', 'isDefault', 'purpose', 'postCode',
'regionCode', 'dti', 'delinquency_2years', 'ficoRangeLow',
'ficoRangeHigh', 'openAcc', 'pubRec', 'pubRecBankruptcies', 'revolBal',
'revolUtil', 'totalAcc', 'initialListStatus', 'applicationType',
'earliesCreditLine', 'title', 'policyCode', 'n0', 'n1', 'n2', 'n2.1',
'n4', 'n5', 'n6', 'n7', 'n8', 'n9', 'n10', 'n11', 'n12', 'n13', 'n14'],
dtype='object')
Field Description
id 为贷款清单分配的唯一信用证标识
loanAmnt 贷款金额
term 贷款期限(year)
interestRate 贷款利率
installment 分期付款金额
grade 贷款等级
subGrade 贷款等级之子级
employmentTitle 就业职称
employmentLength 就业年限(年)
isDefault 标签
样本分布不均匀
总体违约率为0.1995
data[ 'isDefault' ] . value_counts( )
0 640390
1 159610
Name: isDefault, dtype: int64
data[ 'isDefault' ] . sum ( ) / data. shape[ 0 ]
0.1995125
‘interestRate’,‘loanAmnt’,‘installment’ 数值型
违约普遍高于不违约
sns. boxplot( x= 'isDefault' , y= 'loanAmnt' , data= data)
<matplotlib.axes._subplots.AxesSubplot at 0x2ee44efed68>
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-uv6VBE13-1600130353320)(output_13_1.png)]
plt. subplot?
plt. figure( figsize= ( 16 , 5 ) )
count = 1
for i in [ 'interestRate' , 'loanAmnt' , 'installment' ] :
plt. subplot( 1 , 3 , count)
sns. kdeplot( data[ i] )
count += 1
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-mlt7rRKV-1600130353327)(output_15_0.png)]
data[ [ 'isDefault' , 'interestRate' , 'loanAmnt' , 'installment' ] ] . groupby( 'isDefault' ) . agg( [ 'mean' , 'median' ] )
interestRate loanAmnt installment mean median mean median mean median isDefault 0 12.621685 12.18 14133.065749 12000.0 431.222184 368.08 1 15.712749 15.05 15555.298070 14325.0 464.932048 402.83
term 数值型分类
期限为5年的违约比例更高?
pd. crosstab( data[ 'isDefault' ] , data[ 'term' ] , )
term 3 5 isDefault 0 509776 130614 1 97126 62484
‘grade’,‘subGrade’ str_class
等级从A至D违约率越高 sub1至5越高
可以使用sub替代grade
data[ [ 'grade' , 'isDefault' ] ] . groupby( 'grade' ) . apply ( lambda x: x. sum ( ) / x. count( ) )
isDefault grade A 0.060375 B 0.132992 C 0.225020 D 0.303852 E 0.384291 F 0.453524 G 0.497017
data[ [ 'subGrade' , 'isDefault' ] ] . groupby( 'subGrade' ) . apply ( lambda x: x. sum ( ) / x. count( ) )
isDefault subGrade A1 0.031919 A2 0.045697 A3 0.055882 A4 0.067221 A5 0.085399 B1 0.102921 B2 0.112262 B3 0.129239 B4 0.148639 B5 0.165649 C1 0.191360 C2 0.206892 C3 0.224576 C4 0.250113 C5 0.261549 D1 0.277982 D2 0.297572 D3 0.304015 D4 0.322863 D5 0.334735 E1 0.355233 E2 0.376903 E3 0.387460 E4 0.402243 E5 0.419161 F1 0.426498 F2 0.455991 F3 0.456807 F4 0.477440 F5 0.482993 G1 0.466174 G2 0.480910 G3 0.519427 G4 0.521971 G5 0.548837
‘employmentTitle’ int_class
分类较多 (248683,)
len ( data[ 'employmentTitle' ] . value_counts( ) )
248683
employmentLength str_class
年限越久违约率越低
缺失样本违约率较高 (单独分类 )
data[ 'employmentLength' ] . value_counts( )
data[ 'employmentLength' ] . fillna( '-1 years' )
data[ [ 'employmentLength' , 'isDefault' ] ] . groupby( 'employmentLength' ) . apply ( lambda x: x. sum ( ) / x. count( ) )
map_employmentLength = { f'{i} years' : i+ 1 for i in range ( 2 , 10 ) }
map_employmentLength. update( { - 1 : 0 , '< 1 year' : 1 , '1 year' : 2 , '10+ years' : 11 } )
10+ years 262753
2 years 72358
< 1 year 64237
3 years 64152
1 year 52489
5 years 50102
4 years 47985
-1 46799
6 years 37254
8 years 36192
7 years 35407
9 years 30272
Name: employmentLength, dtype: int64
homeOwnership 借款人在登记时提供的房屋所有权状况 annualIncome 年收入 verificationStatus 验证状态 issueDate 贷款发放的月份 purpose 借款人在贷款申请时的贷款用途类别 postCode 借款人在贷款申请中提供的邮政编码的前3位数字 regionCode 地区编码 dti 债务收入比 delinquency_2years 借款人过去2年信用档案中逾期30天以上的违约事件数
homeOwnership int_class
1,5 较高 0,4较低
3,4,5,样本较少
data[ 'homeOwnership' ] . value_counts( )
0 395732
1 317660
2 86309
3 185
5 81
4 33
Name: homeOwnership, dtype: int64
data[ [ 'homeOwnership' , 'isDefault' ] ] . groupby( 'homeOwnership' ) . apply ( lambda x: x. sum ( ) / x. count( ) )
homeOwnership isDefault homeOwnership 0 0.0 0.171535 1 1.0 0.232107 2 2.0 0.207800 3 3.0 0.205405 4 4.0 0.151515 5 5.0 0.234568
annualIncome 数值型
非正态分布
较低的越容易违约
sns. kdeplot( data[ 'annualIncome' ] )
<matplotlib.axes._subplots.AxesSubplot at 0x2ee46e054a8>
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-S8cz72Ap-1600130353333)(output_31_1.png)]
data[ [ 'annualIncome' , 'isDefault' ] ] . groupby( 'isDefault' ) . agg( [ 'mean' , 'median' ] )
annualIncome mean median isDefault 0 77606.514023 65000.0 1 70225.505164 60000.0
‘verificationStatus’ int_class
分类 label
data[ 'verificationStatus' ] . value_counts( )
1 309810
2 248968
0 241222
Name: verificationStatus, dtype: int64
data[ [ 'verificationStatus' , 'isDefault' ] ] . groupby( 'verificationStatus' ) . apply ( lambda x: x. sum ( ) / x. count( ) )
verificationStatus isDefault verificationStatus 0 0.0 0.147221 1 1.0 0.209412 2 2.0 0.237858
issueDate 贷款发放月份
年份影响较大
月份影响较小
data_date = data[ [ 'issueDate' , 'isDefault' ] ]
data_date[ 'year' ] = pd. to_datetime( data. issueDate) . dt. year
data_date[ 'month' ] = pd. to_datetime( data. issueDate) . dt. month
data_date. groupby( 'year' ) [ 'isDefault' ] . mean( )
year
2007 0.178344
2008 0.162073
2009 0.124775
2010 0.131468
2011 0.145621
2012 0.159931
2013 0.155974
2014 0.184278
2015 0.202053
2016 0.233084
2017 0.231040
2018 0.157534
Name: isDefault, dtype: float64
data_date. groupby( 'month' ) [ 'isDefault' ] . mean( )
month
1 0.190948
2 0.191409
3 0.202784
4 0.209204
5 0.205296
6 0.206889
7 0.205550
8 0.199575
9 0.202622
10 0.191226
11 0.192314
12 0.198323
Name: isDefault, dtype: float64
purpose 目的 int_class
data. groupby( 'purpose' ) [ 'isDefault' ] . mean( )
purpose
0 0.211370
1 0.295190
2 0.175718
3 0.189431
4 0.169278
5 0.209655
6 0.225769
7 0.196166
8 0.146125
9 0.218337
10 0.229653
11 0.238267
12 0.111519
13 0.189474
Name: isDefault, dtype: float64
‘postCode’,‘regionCode’ 多分类
分类较多
from sklearn. preprocessing import LabelEncoder
for col in data( [ 'employmentTitle' , 'postCode' , 'title' , 'subGrade' ] ) :
le = LabelEncoder( )
le. fit( list ( data_train[ col] . astype( str ) . values) +
list ( data_test_a[ col] . astype( str ) . values) )
data_train[ col] = le. transform( list ( data_train[ col] . astype( str ) . values) )
data_test_a[ col] = le. transform( list ( data_test_a[ col] . astype( str ) . values) )
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-207-c6562597df2a> in <module>
1 from sklearn.preprocessing import LabelEncoder
----> 2 for col in data(['employmentTitle', 'postCode', 'title','subGrade']):
3 le = LabelEncoder()
4 le.fit(list(data_train[col].astype(str).values) +
5 list(data_test_a[col].astype(str).values))
TypeError: 'DataFrame' object is not callable
le = LabelEncoder( )
data[ 'employmentTitle' ]
le. fit( data[ 'employmentTitle' ] . values. fillna( - 1 ) )
le. transform( data[ 'employmentTitle' ] . values. fillna( - 1 ) )
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-212-a0ac796b2725> in <module>
1 le = LabelEncoder()
2 data['employmentTitle']
----> 3 le.fit(data['employmentTitle'].values.fillna(-1))
4 le.transform(data['employmentTitle'].values.fillna(-1))
AttributeError: 'numpy.ndarray' object has no attribute 'fillna'
le. fit( data[ 'employmentTitle' ] . fillna( 0 ) . values)
LabelEncoder()
w = le. fit_transform( data[ 'employmentTitle' ] . fillna( 0 ) . values)
np. array( w) . max ( )
248682
from sklearn import preprocessing
le = preprocessing. LabelEncoder( )
le. fit( [ 1 , 2 , 2 , 6 ] )
LabelEncoder()
LabelEncoder( )
le. classes_
array([1, 2, 6])
data[ [ 'postCode' , 'regionCode' ] ] . head( )
postCode regionCode id 0 137.0 32 1 156.0 18 2 337.0 14 3 148.0 11 4 301.0 21
dti 债务收入比 数值型
data[ 'dti' ] . describe( )
count 799761.000000
mean 18.284557
std 11.150155
min -1.000000
25% 11.790000
50% 17.610000
75% 24.060000
max 999.000000
Name: dti, dtype: float64
sns. kdeplot( data[ 'dti' ] )
<matplotlib.axes._subplots.AxesSubplot at 0x2ee02cd4b70>
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-TFOAzmaR-1600130353341)(output_54_1.png)]
sns. boxplot( data[ 'dti' ] [ ] )
<matplotlib.axes._subplots.AxesSubplot at 0x2ee02d0e278>
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-oH4ZWlTW-1600130353345)(output_55_1.png)]
data[ [ 'isDefault' , 'dti' , 'employmentLength' ] ] . groupby( 'employmentLength' ) . agg( [ 'mean' , 'max' , 'median' ] )
isDefault dti mean max median mean max median employmentLength -1 0.268788 1 0 20.825034 999.00 19.310 1 year 0.205186 1 0 17.743159 999.00 17.110 10+ years 0.187039 1 0 18.426270 592.12 17.910 2 years 0.200033 1 0 17.760318 580.20 17.140 3 years 0.201490 1 0 17.830329 999.00 17.170 4 years 0.198291 1 0 17.964413 999.00 17.420 5 years 0.195741 1 0 17.913925 466.92 17.350 6 years 0.192812 1 0 18.034503 308.25 17.550 7 years 0.195046 1 0 18.132814 345.42 17.500 8 years 0.197226 1 0 18.256569 77.23 17.710 9 years 0.198401 1 0 18.432058 489.16 17.945 < 1 year 0.204882 1 0 18.052667 999.00 17.000
delinquency_2years 数值型
data[ 'delinquency_2years' ] . value_counts( )
0.0 645715
1.0 102586
2.0 29944
3.0 10919
4.0 4808
5.0 2504
6.0 1399
7.0 770
8.0 443
9.0 293
10.0 192
11.0 144
12.0 86
14.0 53
13.0 50
15.0 23
16.0 20
18.0 13
19.0 10
20.0 7
17.0 6
21.0 3
22.0 3
26.0 2
29.0 2
30.0 1
25.0 1
39.0 1
27.0 1
24.0 1
Name: delinquency_2years, dtype: int64
data[ [ 'isDefault' , 'delinquency_2years' ] ] . groupby( 'delinquency_2years' ) . mean( )
isDefault delinquency_2years 0.0 0.195829 1.0 0.208401 2.0 0.223484 3.0 0.229600 4.0 0.235441 5.0 0.243211 6.0 0.235883 7.0 0.254545 8.0 0.207675 9.0 0.252560 10.0 0.213542 11.0 0.215278 12.0 0.267442 13.0 0.300000 14.0 0.245283 15.0 0.304348 16.0 0.300000 17.0 0.500000 18.0 0.076923 19.0 0.300000 20.0 0.428571 21.0 0.333333 22.0 0.000000 24.0 0.000000 25.0 1.000000 26.0 0.000000 27.0 1.000000 29.0 0.000000 30.0 0.000000 39.0 0.000000
ficoRangeLow 借款人在贷款发放时的fico所属的下限范围
ficoRangeHigh 借款人在贷款发放时的fico所属的上限范围
openAcc 借款人信用档案中未结信用额度的数量
pubRec 贬损公共记录的数量
pubRecBankruptcies 公开记录清除的数量
revolBal 信贷周转余额合计
revolUtil 循环额度利用率,或借款人使用的相对于所有可用循环信贷的信贷金额
totalAcc 借款人信用档案中当前的信用额度总数
initialListStatus 贷款的初始列表状态
applicationType 表明贷款是个人申请还是与两个共同借款人的联合申请
earliesCreditLine 借款人最早报告的信用额度开立的月份
title 借款人提供的贷款名称
policyCode 公开可用的策略_代码= 1 新产品不公开可用的策略_代码= 2
n系列匿名特征 匿名特征n0- n14,为一些贷款人行为计数特征的处理
‘ficoRangeLow’,‘ficoRangeHigh’,‘openAcc’,‘pubRec’,‘pubRecBankruptcies’,‘revolBal’,‘revolUtil’,‘totalAcc’
data[ [ 'ficoRangeLow' , 'ficoRangeHigh' , 'openAcc' , 'pubRec' , 'pubRecBankruptcies' , 'revolBal' , 'revolUtil' , 'totalAcc' ] ] . head( )
ficoRangeLow ficoRangeHigh openAcc pubRec pubRecBankruptcies revolBal revolUtil totalAcc id 0 730.0 734.0 7.0 0.0 0.0 24178.0 48.9 27.0 1 700.0 704.0 13.0 0.0 0.0 15096.0 38.9 18.0 2 675.0 679.0 11.0 0.0 0.0 4606.0 51.8 27.0 3 685.0 689.0 9.0 0.0 0.0 9948.0 52.6 28.0 4 690.0 694.0 12.0 0.0 0.0 2942.0 32.0 27.0
data[ [ 'initialListStatus' , 'applicationType' ] ] . head( )
initialListStatus applicationType id 0 0 0 1 1 0 2 0 0 3 1 0 4 0 0
earliesCreditLine 时间变量
len ( pd. to_datetime( data[ 'earliesCreditLine' ] , format = '%b-%Y' ) )
800000
title 多分类变量
len ( data[ 'title' ] . value_counts( ) )
39644
policyCode useless
useless
data[ 'policyCode' ] . value_counts( )
1.0 800000
Name: policyCode, dtype: int64
匿名 行为计数
data[ data. columns[ - 15 : ] ] . isna( ) . sum ( ) / data. shape[ 0 ]
n0 0.050338
n1 0.050338
n2 0.050338
n2.1 0.050338
n4 0.041549
n5 0.050338
n6 0.050338
n7 0.050338
n8 0.050339
n9 0.050338
n10 0.041549
n11 0.087190
n12 0.050338
n13 0.050338
n14 0.050338
dtype: float64
[ len ( data[ i] . unique( ) ) for i in data. columns[ - 15 : ] ]
[40, 34, 51, 51, 47, 66, 108, 71, 103, 45, 77, 6, 6, 29, 32]
data[ data. columns[ - 15 : ] ] . describe( )
n0 n1 n2 n2.1 n4 n5 n6 n7 n8 n9 n10 n11 n12 n13 n14 count 759730.000000 759730.000000 759730.000000 759730.000000 766761.000000 759730.000000 759730.000000 759730.000000 759729.000000 759730.000000 766761.000000 730248.000000 759730.000000 759730.000000 759730.000000 mean 0.511932 3.642330 5.642648 5.642648 4.735641 8.107937 8.575994 8.282953 14.622488 5.592345 11.643896 0.000815 0.003384 0.089366 2.178606 std 1.333266 2.246825 3.302810 3.302810 2.949969 4.799210 7.400536 4.561689 8.124610 3.216184 5.484104 0.030075 0.062041 0.509069 1.844377 min 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 25% 0.000000 2.000000 3.000000 3.000000 3.000000 5.000000 4.000000 5.000000 9.000000 3.000000 8.000000 0.000000 0.000000 0.000000 1.000000 50% 0.000000 3.000000 5.000000 5.000000 4.000000 7.000000 7.000000 7.000000 13.000000 5.000000 11.000000 0.000000 0.000000 0.000000 2.000000 75% 0.000000 5.000000 7.000000 7.000000 6.000000 11.000000 11.000000 10.000000 19.000000 7.000000 14.000000 0.000000 0.000000 0.000000 3.000000 max 51.000000 33.000000 63.000000 63.000000 49.000000 70.000000 132.000000 79.000000 128.000000 45.000000 82.000000 4.000000 4.000000 39.000000 30.000000
sns. heatmap( data[ [ 'isDefault' ] ] . join( data[ data. columns[ - 15 : ] ] ) . corr( ) . abs ( ) , vmin= 0 , vmax= 1 )
<matplotlib.axes._subplots.AxesSubplot at 0x2ee755187b8>
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-vKv8es1S-1600130353347)(output_74_1.png)]
data[ [ 'isDefault' ] ] . join( data[ data. columns[ - 15 : ] ] ) . corr( )
isDefault n0 n1 n2 n2.1 n4 n5 n6 n7 n8 n9 n10 n11 n12 n13 n14 isDefault 1.000000 0.010953 0.040659 0.070352 0.070352 0.013156 -0.018661 0.005350 0.032007 -0.005951 0.068978 0.025583 -0.000293 0.003287 0.010101 0.085715 n0 0.010953 1.000000 -0.056378 -0.029272 -0.029272 -0.069880 0.067624 0.126076 -0.019447 0.082053 -0.023248 0.023395 0.035126 -0.003520 0.313191 0.080475 n1 0.040659 -0.056378 1.000000 0.807789 0.807789 0.829016 0.577299 -0.032224 0.651852 0.443406 0.800925 0.540271 -0.004406 -0.004329 -0.033712 0.160022 n2 0.070352 -0.029272 0.807789 1.000000 1.000000 0.663186 0.473744 -0.001495 0.790337 0.567608 0.982015 0.655296 -0.002554 0.002908 -0.018058 0.256501 n2.1 0.070352 -0.029272 0.807789 1.000000 1.000000 0.663186 0.473744 -0.001495 0.790337 0.567608 0.982015 0.655296 -0.002554 0.002908 -0.018058 0.256501 n4 0.013156 -0.069880 0.829016 0.663186 0.663186 1.000000 0.717936 -0.019485 0.742157 0.573103 0.639867 0.614658 -0.000965 -0.003339 -0.020490 0.236147 n5 -0.018661 0.067624 0.577299 0.473744 0.473744 0.717936 1.000000 0.032646 0.618970 0.838066 0.472434 0.506232 0.007712 0.010103 0.016239 0.217438 n6 0.005350 0.126076 -0.032224 -0.001495 -0.001495 -0.019485 0.032646 1.000000 0.025892 0.072633 0.001943 0.389924 0.000764 0.006952 0.070177 0.189194 n7 0.032007 -0.019447 0.651852 0.790337 0.790337 0.742157 0.618970 0.025892 1.000000 0.774955 0.794465 0.829799 0.003134 0.011109 -0.024751 0.345906 n8 -0.005951 0.082053 0.443406 0.567608 0.567608 0.573103 0.838066 0.072633 0.774955 1.000000 0.563910 0.640729 0.010462 0.018247 0.029846 0.294802 n9 0.068978 -0.023248 0.800925 0.982015 0.982015 0.639867 0.472434 0.001943 0.794465 0.563910 1.000000 0.660395 -0.002568 0.002943 -0.025324 0.243350 n10 0.025583 0.023395 0.540271 0.655296 0.655296 0.614658 0.506232 0.389924 0.829799 0.640729 0.660395 1.000000 0.001392 0.006919 0.010042 0.364384 n11 -0.000293 0.035126 -0.004406 -0.002554 -0.002554 -0.000965 0.007712 0.000764 0.003134 0.010462 -0.002568 0.001392 1.000000 0.003629 0.072216 -0.005094 n12 0.003287 -0.003520 -0.004329 0.002908 0.002908 -0.003339 0.010103 0.006952 0.011109 0.018247 0.002943 0.006919 0.003629 1.000000 0.004886 -0.006824 n13 0.010101 0.313191 -0.033712 -0.018058 -0.018058 -0.020490 0.016239 0.070177 -0.024751 0.029846 -0.025324 0.010042 0.072216 0.004886 1.000000 -0.000595 n14 0.085715 0.080475 0.160022 0.256501 0.256501 0.236147 0.217438 0.189194 0.345906 0.294802 0.243350 0.364384 -0.005094 -0.006824 -0.000595 1.000000
sns. heatmap?