python 金融发欺诈_python金融反欺诈-项目实战

python风控建模实战lendingClub(博主录制,catboost,lightgbm建模,2K超清分辨率)

## 1. Data Lending Club 2016年Q3数据:https://www.lendingclub.com/info/download-data.action

参考:http://kldavenport.com/lending-club-data-analysis-revisted-with-python/

import pandas as pd

import numpy as np

import matplotlib.pyplot as plt import seaborn as sns %matplotlib inline

df = pd.read_csv("./LoanStats_2016Q3.csv",skiprows=1,low_memory=False)

df.info()

df.head(3)

id

member_id

loan_amnt

funded_amnt

funded_amnt_inv

term

int_rate

installment

grade

sub_grade

sec_app_earliest_cr_line

sec_app_inq_last_6mths

sec_app_mort_acc

sec_app_open_acc

sec_app_revol_util

sec_app_open_il_6m

sec_app_num_rev_accts

sec_app_chargeoff_within_12_mths

sec_app_collections_12_mths_ex_med

sec_app_mths_since_last_major_derog

0

NaN

NaN

15000.0

15000.0

15000.0

36 months

13.99%

512.60

C

C3

NaN

NaN

NaN

NaN

NaN

NaN

NaN

NaN

NaN

NaN

1

NaN

NaN

2600.0

2600.0

2600.0

36 months

8.99%

82.67

B

B1

NaN

NaN

NaN

NaN

NaN

NaN

NaN

NaN

NaN

NaN

2

NaN

NaN

32200.0

32200.0

32200.0

60 months

21.49%

880.02

D

D5

NaN

NaN

NaN

NaN

NaN

NaN

NaN

NaN

NaN

NaN

3 rows × 122 columns

## 2. Keep what we need

# .ix[row slice, column slice]

df.ix[:4,:7]

id

member_id

loan_amnt

funded_amnt

funded_amnt_inv

term

int_rate

0

NaN

NaN

15000.0

15000.0

15000.0

36 months

13.99%

1

NaN

NaN

2600.0

2600.0

2600.0

36 months

8.99%

2

NaN

NaN

32200.0

32200.0

32200.0

60 months

21.49%

3

NaN

NaN

10000.0

10000.0

10000.0

36 months

11.49%

4

NaN

NaN

6000.0

6000.0

6000.0

36 months

13.49%

df.drop('id',1,inplace=True)

df.drop('member_id',1,inplace=True)

df.int_rate = pd.Series(df.int_rate).str.replace('%', '').astype(float)

df.ix[:4,:7]

loan_amnt

funded_amnt

funded_amnt_inv

term

int_rate

installment

grade

0

15000.0

15000.0

15000.0

36 months

13.99

512.60

C

1

2600.0

2600.0

2600.0

36 months

8.99

82.67

B

2

32200.0

32200.0

32200.0

60 months

21.49

880.02

D

3

10000.0

10000.0

10000.0

36 months

11.49

329.72

B

4

6000.0

6000.0

6000.0

36 months

13.49

203.59

C

### Loan Amount Requested Verus the Funded Amount

print (df.loan_amnt != df.funded_amnt).value_counts()

False 99120 True 4 dtype: int64

df.query('loan_amnt != funded_amnt').head(5)

loan_amnt

funded_amnt

funded_amnt_inv

term

int_rate

installment

grade

sub_grade

emp_title

emp_length

sec_app_earliest_cr_line

sec_app_inq_last_6mths

sec_app_mort_acc

sec_app_open_acc

sec_app_revol_util

sec_app_open_il_6m

sec_app_num_rev_accts

sec_app_chargeoff_within_12_mths

sec_app_collections_12_mths_ex_med

sec_app_mths_since_last_major_derog

99120

NaN

NaN

NaN

NaN

NaN

NaN

NaN

NaN

NaN

NaN

NaN

NaN

NaN

NaN

NaN

NaN

NaN

NaN

NaN

NaN

99121

NaN

NaN

NaN

NaN

NaN

NaN

NaN

NaN

NaN

NaN

NaN

NaN

NaN

NaN

NaN

NaN

NaN

NaN

NaN

NaN

99122

NaN

NaN

NaN

NaN

NaN

NaN

NaN

NaN

NaN

NaN

NaN

NaN

NaN

NaN

NaN

NaN

NaN

NaN

NaN

NaN

99123

NaN

NaN

NaN

NaN

NaN

NaN

NaN

NaN

NaN

NaN

NaN

NaN

NaN

NaN

NaN

NaN

NaN

NaN

NaN

NaN

4 rows × 120 columns

df.dropna(axis=0, how='all',inplace=True)

df.info()

df.dropna(axis=1, how='all',inplace=True)

df.info()

df.ix[:5,8:15]

emp_title

emp_length

home_ownership

annual_inc

verification_status

issue_d

loan_status

0

Fiscal Director

2 years

RENT

55000.0

Not Verified

Sep-16

Current

1

Loaner Coordinator

3 years

RENT

35000.0

Source Verified

Sep-16

Fully Paid

2

warehouse/supervisor

10+ years

MORTGAGE

65000.0

Not Verified

Sep-16

Fully Paid

3

Teacher

10+ years

OWN

55900.0

Not Verified

Sep-16

Current

4

SERVICE MGR

5 years

RENT

33000.0

Not Verified

Sep-16

Current

5

General Manager

10+ years

MORTGAGE

109000.0

Source Verified

Sep-16

Current

### emp_title: employment title

print df.emp_title.value_counts().head()

print df.emp_title.value_counts().tail()

df.emp_title.unique().shape

Teacher 1931 Manager 1701 Owner 990 Supervisor 785 Driver 756 Name: emp_title, dtype: int64 Agent Services Representative 1 Operator Bridge Tunnel 1 Reg Medical Assistant/Referral Spec. 1 Home Health Care 1 rounds cook 1 Name: emp_title, dtype: int64 (37421,)

df.drop(['emp_title'],1, inplace=True)

df.ix[:5,8:15]

emp_length

home_ownership

annual_inc

verification_status

issue_d

loan_status

pymnt_plan

0

2 years

RENT

55000.0

Not Verified

Sep-16

Current

n

1

3 years

RENT

35000.0

Source Verified

Sep-16

Fully Paid

n

2

10+ years

MORTGAGE

65000.0

Not Verified

Sep-16

Fully Paid

n

3

10+ years

OWN

55900.0

Not Verified

Sep-16

Current

n

4

5 years

RENT

33000.0

Not Verified

Sep-16

Current

n

5

10+ years

MORTGAGE

109000.0

Source Verified

Sep-16

Current

n

### emp_length: employment length

df.emp_length.value_counts()

10+ years 34219 2 years 9066 3 years 7925

df.replace('n/a', np.nan,inplace=True)

df.emp_length.fillna(value=0,inplace=True)

df['emp_length'].replace(to_replace='[^0-9]+', value='', inplace=True, regex=True) df['emp_length'] = df['emp_length'].astype(int)

df.emp_length.value_counts()

10 34219 1 14095 2 9066 3 7925 5 6170 4 6022 0 5922 6 4406 8 4168 9 3922 7 3205 Name: emp_length, dtype: int64 ### verification status:”Indicates if income was verified by LC, not verified, or if the income source was verified”

df.verification_status.value_counts()

Source Verified 40781 Verified 31356 Not Verified 26983 Name: verification_status, dtype: int64 ### Target: Loan Statuses

df.info()

df.columns

Index([u’loan_amnt’, u’funded_amnt’, u’funded_amnt_inv’, u’term’, u’int_rate’, u’installment’, u’grade’, u’sub_grade’, u’emp_length’, u’home_ownership’, … u’num_tl_90g_dpd_24m’, u’num_tl_op_past_12m’, u’pct_tl_nvr_dlq’, u’percent_bc_gt_75’, u’pub_rec_bankruptcies’, u’tax_liens’, u’tot_hi_cred_lim’, u’total_bal_ex_mort’, u’total_bc_limit’, u’total_il_high_credit_limit’], dtype=’object’, length=107)

pd.unique(df['loan_status'].values.ravel())

array([‘Current’, ‘Fully Paid’, ‘Late (31-120 days)’, ‘Charged Off’, ‘Late (16-30 days)’, ‘In Grace Period’, ‘Default’], dtype=object)

for col in df.select_dtypes(include=['object']).columns:

print ("Column {} has {} unique instances".format( col, len(df[col].unique())) )

Column term has 2 unique instances Column grade has 7 unique instances Column sub_grade has 35 unique instances Column home_ownership has 4 unique instances Column verification_status has 3 unique instances Column issue_d has 3 unique instances Column loan_status has 7 unique instances Column pymnt_plan has 2 unique instances Column desc has 6 unique instances Column purpose has 13 unique instances Column title has 13 unique instances Column zip_code has 873 unique instances Column addr_state has 50 unique instances Column earliest_cr_line has 614 unique instances Column revol_util has 1087 unique instances Column initial_list_status has 2 unique instances Column last_pymnt_d has 13

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
lending club 贷款数据 2018年第二季度的贷款数据 "id","member_id","loan_amnt","funded_amnt","funded_amnt_inv","term","int_rate","installment","grade","sub_grade","emp_title","emp_length","home_ownership","annual_inc","verification_status","issue_d","loan_status","pymnt_plan","url","desc","purpose","title","zip_code","addr_state","dti","delinq_2yrs","earliest_cr_line","inq_last_6mths","mths_since_last_delinq","mths_since_last_record","open_acc","pub_rec","revol_bal","revol_util","total_acc","initial_list_status","out_prncp","out_prncp_inv","total_pymnt","total_pymnt_inv","total_rec_prncp","total_rec_int","total_rec_late_fee","recoveries","collection_recovery_fee","last_pymnt_d","last_pymnt_amnt","next_pymnt_d","last_credit_pull_d","collections_12_mths_ex_med","mths_since_last_major_derog","policy_code","application_type","annual_inc_joint","dti_joint","verification_status_joint","acc_now_delinq","tot_coll_amt","tot_cur_bal","open_acc_6m","open_act_il","open_il_12m","open_il_24m","mths_since_rcnt_il","total_bal_il","il_util","open_rv_12m","open_rv_24m","max_bal_bc","all_util","total_rev_hi_lim","inq_fi","total_cu_tl","inq_last_12m","acc_open_past_24mths","avg_cur_bal","bc_open_to_buy","bc_util","chargeoff_within_12_mths","delinq_amnt","mo_sin_old_il_acct","mo_sin_old_rev_tl_op","mo_sin_rcnt_rev_tl_op","mo_sin_rcnt_tl","mort_acc","mths_since_recent_bc","mths_since_recent_bc_dlq","mths_since_recent_inq","mths_since_recent_revol_delinq","num_accts_ever_120_pd","num_actv_bc_tl","num_actv_rev_tl","num_bc_sats","num_bc_tl","num_il_tl","num_op_rev_tl","num_rev_accts","num_rev_tl_bal_gt_0","num_sats","num_tl_120dpd_2m","num_tl_30dpd","num_tl_90g_dpd_24m","num_tl_op_past_12m","pct_tl_nvr_dlq","percent_bc_gt_75","pub_rec_bankruptcies","tax_liens","tot_hi_cred_lim","total_bal_ex_mort","total_bc_limit","total_il_high_credit_limit","revol_bal_joint","sec_app_earliest_cr_line","sec_app_inq_last_6mths","sec_app_mort_acc","sec_app_open_acc","sec_app_revol_util","sec_app_open_act_il","sec_app_num_rev

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值