Untitled (1)

import pandas as pd
import numpy as np
t = pd.read_csv('C:/Users/user/Desktop/train.csv')
T = t.set_index('Loan_ID')
T.head(10)
GenderMarriedDependentsEducationSelf_EmployedApplicantIncomeCoapplicantIncomeLoanAmountLoan_Amount_TermCredit_HistoryProperty_AreaLoan_Status
Loan_ID
LP001002MaleNo0GraduateNo58490.0NaN360.01.0UrbanY
LP001003MaleYes1GraduateNo45831508.0128.0360.01.0RuralN
LP001005MaleYes0GraduateYes30000.066.0360.01.0UrbanY
LP001006MaleYes0Not GraduateNo25832358.0120.0360.01.0UrbanY
LP001008MaleNo0GraduateNo60000.0141.0360.01.0UrbanY
LP001011MaleYes2GraduateYes54174196.0267.0360.01.0UrbanY
LP001013MaleYes0Not GraduateNo23331516.095.0360.01.0UrbanY
LP001014MaleYes3+GraduateNo30362504.0158.0360.00.0SemiurbanN
LP001018MaleYes2GraduateNo40061526.0168.0360.01.0UrbanY
LP001020MaleYes1GraduateNo1284110968.0349.0360.01.0SemiurbanN
mask=(T['Education']=='Not Graduate')&(T['Loan_Status']=='Y')&(T['Gender']=='Female')
T.loc[mask,['Gender','Education','Loan_Status']]
GenderEducationLoan_Status
Loan_ID
LP001155FemaleNot GraduateY
LP001669FemaleNot GraduateY
LP001692FemaleNot GraduateY
LP001908FemaleNot GraduateY
LP002300FemaleNot GraduateY
LP002314FemaleNot GraduateY
LP002407FemaleNot GraduateY
LP002489FemaleNot GraduateY
LP002502FemaleNot GraduateY
LP002534FemaleNot GraduateY
LP002582FemaleNot GraduateY
LP002731FemaleNot GraduateY
LP002757FemaleNot GraduateY
LP002917FemaleNot GraduateY
def num_missing(x):
    return sum(x.isnull())
T.apply(num_missing,axis=0)
Gender               13
Married               3
Dependents           15
Education             0
Self_Employed        32
ApplicantIncome       0
CoapplicantIncome     0
LoanAmount           22
Loan_Amount_Term     14
Credit_History       50
Property_Area         0
Loan_Status           0
dtype: int64
T.apply(num_missing,axis=1)[:10]
Loan_ID
LP001002    1
LP001003    0
LP001005    0
LP001006    0
LP001008    0
LP001011    0
LP001013    0
LP001014    0
LP001018    0
LP001020    0
dtype: int64
T['Gender'].fillna(T['Gender'].mode().iloc[0],inplace=True)
T['Married'].fillna(T['Married'].mode().iloc[0],inplace=True)
T['Self_Employed'].fillna(T['Self_Employed'].mode().iloc[0],inplace=True)
pd.crosstab(T['Credit_History'],T['Loan_Status'],margins=True)
Loan_StatusNYAll
Credit_History
0.082789
1.097378475
All179385564
prop_rates = pd.DataFrame([1000, 5000, 12000], index=['Rural','Semiurban','Urban'],columns=['rates'])
prop_rates
rates
Rural1000
Semiurban5000
Urban12000
T1=T.merge(right=prop_rates,how='inner',left_on='Property_Area',right_index=True,sort=False)
T1
GenderMarriedDependentsEducationSelf_EmployedApplicantIncomeCoapplicantIncomeLoanAmountLoan_Amount_TermCredit_HistoryProperty_AreaLoan_Statusrates
Loan_ID
LP001002MaleNo0GraduateNo58490.0NaN360.01.0UrbanY12000
LP001005MaleYes0GraduateYes30000.066.0360.01.0UrbanY12000
LP001006MaleYes0Not GraduateNo25832358.0120.0360.01.0UrbanY12000
LP001008MaleNo0GraduateNo60000.0141.0360.01.0UrbanY12000
LP001011MaleYes2GraduateYes54174196.0267.0360.01.0UrbanY12000
LP001013MaleYes0Not GraduateNo23331516.095.0360.01.0UrbanY12000
LP001018MaleYes2GraduateNo40061526.0168.0360.01.0UrbanY12000
LP001024MaleYes2GraduateNo3200700.070.0360.01.0UrbanY12000
LP001027MaleYes2GraduateNo25001840.0109.0360.01.0UrbanY12000
LP001028MaleYes2GraduateNo30738106.0200.0360.01.0UrbanY12000
LP001030MaleYes2GraduateNo12991086.017.0120.01.0UrbanY12000
LP001032MaleNo0GraduateNo49500.0125.0360.01.0UrbanY12000
LP001034MaleNo1Not GraduateNo35960.0100.0240.0NaNUrbanY12000
LP001036FemaleNo0GraduateNo35100.076.0360.00.0UrbanN12000
LP001041MaleYes0GraduateNo26003500.0115.0NaN1.0UrbanY12000
LP001043MaleYes0Not GraduateNo76600.0104.0360.00.0UrbanN12000
LP001046MaleYes1GraduateNo59555625.0315.0360.01.0UrbanY12000
LP001073MaleYes2Not GraduateNo42261040.0110.0360.01.0UrbanY12000
LP001086MaleNo0Not GraduateNo14420.035.0360.01.0UrbanN12000
LP001091MaleYes1GraduateNo41663369.0201.0360.0NaNUrbanN12000
LP001095MaleNo0GraduateNo31670.074.0360.01.0UrbanN12000
LP001106MaleYes0GraduateNo22752067.0NaN360.01.0UrbanY12000
LP001109MaleYes0GraduateNo18281330.0100.0NaN0.0UrbanN12000
LP001114MaleNo0GraduateNo41667210.0184.0360.01.0UrbanY12000
LP001119MaleNo0GraduateNo36000.080.0360.01.0UrbanN12000
LP001120MaleNo0GraduateNo18001213.047.0360.01.0UrbanY12000
LP001123MaleYes0GraduateNo24000.075.0360.0NaNUrbanY12000
LP001136MaleYes0Not GraduateYes46950.096.0NaN1.0UrbanY12000
LP001137FemaleNo0GraduateNo34100.088.0NaN1.0UrbanY12000
LP001138MaleYes1GraduateNo56490.044.0360.01.0UrbanY12000
..........................................
LP002729MaleNo1GraduateNo112500.0196.0360.0NaNSemiurbanN5000
LP002738MaleNo2GraduateNo36170.0107.0360.01.0SemiurbanY5000
LP002741FemaleYes1GraduateNo46082845.0140.0180.01.0SemiurbanY5000
LP002743FemaleNo0GraduateNo21380.099.0360.00.0SemiurbanN5000
LP002753FemaleNo1GraduateNo36520.095.0360.01.0SemiurbanY5000
LP002757FemaleYes0Not GraduateNo3017663.0102.0360.0NaNSemiurbanY5000
LP002768MaleNo0Not GraduateNo33580.080.036.01.0SemiurbanN5000
LP002776FemaleNo0GraduateNo50000.0103.0360.00.0SemiurbanN5000
LP002792MaleYes1GraduateNo54681032.026.0360.01.0SemiurbanY5000
LP002795MaleYes3+GraduateYes101390.0260.0360.01.0SemiurbanY5000
LP002798MaleYes0GraduateNo38872669.0162.0360.01.0SemiurbanY5000
LP002804FemaleYes0GraduateNo41802306.0182.0360.01.0SemiurbanY5000
LP002807MaleYes2Not GraduateNo3675242.0108.0360.01.0SemiurbanY5000
LP002813FemaleYes1GraduateYes194840.0600.0360.01.0SemiurbanY5000
LP002821MaleNo0Not GraduateYes58000.0132.0360.01.0SemiurbanY5000
LP002862MaleYes2Not GraduateNo61251625.0187.0480.01.0SemiurbanN5000
LP002863MaleYes3+GraduateNo64060.0150.0360.01.0SemiurbanN5000
LP002872MaleYes0GraduateNo30872210.0136.0360.00.0SemiurbanN5000
LP002892MaleYes2GraduateNo65400.0205.0360.01.0SemiurbanY5000
LP002894FemaleYes0GraduateNo31660.036.0360.01.0SemiurbanY5000
LP002917FemaleNo0Not GraduateNo21650.070.0360.01.0SemiurbanY5000
LP002925MaleNo0GraduateNo47500.094.0360.01.0SemiurbanY5000
LP002926MaleYes2GraduateYes27260.0106.0360.00.0SemiurbanN5000
LP002928MaleYes0GraduateNo30003416.056.0180.01.0SemiurbanY5000
LP002931MaleYes2GraduateYes60000.0205.0240.01.0SemiurbanN5000
LP002933MaleNo3+GraduateYes93570.0292.0360.01.0SemiurbanY5000
LP002943MaleNoNaNGraduateNo29870.088.0360.00.0SemiurbanN5000
LP002959FemaleYes1GraduateNo120000.0496.0360.01.0SemiurbanY5000
LP002961MaleYes1GraduateNo34002500.0173.0360.01.0SemiurbanY5000
LP002990FemaleNo0GraduateYes45830.0133.0360.00.0SemiurbanN5000

614 rows × 13 columns

T1.pivot_table(values="Credit_History",index=["Property_Area","rates"],aggfunc=len)
Credit_History
Property_Arearates
Rural1000179.0
Semiurban5000233.0
Urban12000202.0
T1.groupby(['Property_Area','rates'])['Credit_History'].value_counts()
Property_Area  rates  Credit_History
Rural          1000   1.0               137
                      0.0                28
Semiurban      5000   1.0               187
                      0.0                30
Urban          12000  1.0               151
                      0.0                31
Name: Credit_History, dtype: int64
paixu=T[['ApplicantIncome','CoapplicantIncome']]
paixu.sort_values(by=['ApplicantIncome', 'CoapplicantIncome'], inplace=True, ascending=False)
paixu.head(10)
C:\Users\user\Anaconda3\lib\site-packages\ipykernel_launcher.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.
ApplicantIncomeCoapplicantIncome
Loan_ID
LP002317810000.0
LP002101633370.0
LP001585517630.0
LP001536399990.0
LP001640391474750.0
LP002422377190.0
LP001637338460.0
LP001448238030.0
LP002624208336667.0
LP001922206670.0
T2 = T.iloc[:5,:2]
T2
GenderMarried
Loan_ID
LP001002MaleNo
LP001003MaleYes
LP001005MaleYes
LP001006MaleYes
LP001008MaleNo
T3 = T.iloc[:8,2:4]
T3
DependentsEducation
Loan_ID
LP0010020Graduate
LP0010031Graduate
LP0010050Graduate
LP0010060Not Graduate
LP0010080Graduate
LP0010112Graduate
LP0010130Not Graduate
LP0010143+Graduate
T2.merge(T3,how='inner',left_index=True,right_index=True)#合并两个表
GenderMarriedDependentsEducation
Loan_ID
LP001002MaleNo0Graduate
LP001003MaleYes1Graduate
LP001005MaleYes0Graduate
LP001006MaleYes0Not Graduate
LP001008MaleNo0Graduate
T3.merge(T2,how='inner',left_index=True,right_index=True)
DependentsEducationGenderMarried
Loan_ID
LP0010020GraduateMaleNo
LP0010031GraduateMaleYes
LP0010050GraduateMaleYes
LP0010060Not GraduateMaleYes
LP0010080GraduateMaleNo
T2.merge(T3,how='right',left_index=True,right_index=True)
GenderMarriedDependentsEducation
Loan_ID
LP001002MaleNo0Graduate
LP001003MaleYes1Graduate
LP001005MaleYes0Graduate
LP001006MaleYes0Not Graduate
LP001008MaleNo0Graduate
LP001011NaNNaN2Graduate
LP001013NaNNaN0Not Graduate
LP001014NaNNaN3+Graduate
T2.merge(T3,how='outer',left_index=True,right_index=True)
GenderMarriedDependentsEducation
Loan_ID
LP001002MaleNo0Graduate
LP001003MaleYes1Graduate
LP001005MaleYes0Graduate
LP001006MaleYes0Not Graduate
LP001008MaleNo0Graduate
LP001011NaNNaN2Graduate
LP001013NaNNaN0Not Graduate
LP001014NaNNaN3+Graduate
pd.concat([T2,T3])
DependentsEducationGenderMarried
Loan_ID
LP001002NaNNaNMaleNo
LP001003NaNNaNMaleYes
LP001005NaNNaNMaleYes
LP001006NaNNaNMaleYes
LP001008NaNNaNMaleNo
LP0010020GraduateNaNNaN
LP0010031GraduateNaNNaN
LP0010050GraduateNaNNaN
LP0010060Not GraduateNaNNaN
LP0010080GraduateNaNNaN
LP0010112GraduateNaNNaN
LP0010130Not GraduateNaNNaN
LP0010143+GraduateNaNNaN
pd.concat([T2,T3],axis=0)
DependentsEducationGenderMarried
Loan_ID
LP001002NaNNaNMaleNo
LP001003NaNNaNMaleYes
LP001005NaNNaNMaleYes
LP001006NaNNaNMaleYes
LP001008NaNNaNMaleNo
LP0010020GraduateNaNNaN
LP0010031GraduateNaNNaN
LP0010050GraduateNaNNaN
LP0010060Not GraduateNaNNaN
LP0010080GraduateNaNNaN
LP0010112GraduateNaNNaN
LP0010130Not GraduateNaNNaN
LP0010143+GraduateNaNNaN
pd.concat([T2,T3],axis=1)
GenderMarriedDependentsEducation
LP001002MaleNo0Graduate
LP001003MaleYes1Graduate
LP001005MaleYes0Graduate
LP001006MaleYes0Not Graduate
LP001008MaleNo0Graduate
LP001011NaNNaN2Graduate
LP001013NaNNaN0Not Graduate
LP001014NaNNaN3+Graduate
pd.concat([T2,T3],axis=1,join='inner')
GenderMarriedDependentsEducation
Loan_ID
LP001002MaleNo0Graduate
LP001003MaleYes1Graduate
LP001005MaleYes0Graduate
LP001006MaleYes0Not Graduate
LP001008MaleNo0Graduate
paixu = T.sort_values(['ApplicantIncome', 'CoapplicantIncome'], ascending=False)
paixu[['ApplicantIncome','CoapplicantIncome']].head(10)
ApplicantIncomeCoapplicantIncome
Loan_ID
LP002317810000.0
LP002101633370.0
LP001585517630.0
LP001536399990.0
LP001640391474750.0
LP002422377190.0
LP001637338460.0
LP001448238030.0
LP002624208336667.0
LP001922206670.0
cut_points=[90,140,190]
break_points=[T['LoanAmount'].min()]+cut_points+[T['LoanAmount'].max()]
print(break_points)
labels=['low','medium','high','very_high']
T['LoanAmount_Bin']=pd.cut(T['LoanAmount'],bins=break_points,right=False,labels=['low','medium','high','very_high'])
[9.0, 90, 140, 190, 700.0]
pd.value_counts(T['LoanAmount_Bin'], sort=False)
low           98
medium       266
high         136
very_high     91
Name: LoanAmount_Bin, dtype: int64
T['Loan_Status_Coded']=T['Loan_Status'].replace({'N':0,'Y':1})
pd.get_dummies(T['LoanAmount_Bin'],prefix='LoanAmount')
LoanAmount_lowLoanAmount_mediumLoanAmount_highLoanAmount_very_high
Loan_ID
LP0010020000
LP0010030100
LP0010051000
LP0010060100
LP0010080010
LP0010110001
LP0010130100
LP0010140010
LP0010180010
LP0010200001
LP0010241000
LP0010270100
LP0010280001
LP0010290100
LP0010301000
LP0010320100
LP0010340100
LP0010361000
LP0010380100
LP0010410100
LP0010430100
LP0010460001
LP0010470100
LP0010500100
LP0010520010
LP0010660001
LP0010680100
LP0010730100
LP0010861000
LP0010870100
...............
LP0029110010
LP0029120010
LP0029160100
LP0029171000
LP0029250100
LP0029260100
LP0029281000
LP0029310001
LP0029330001
LP0029360010
LP0029380001
LP0029400100
LP0029410010
LP0029431000
LP0029450010
LP0029480001
LP0029490001
LP0029500010
LP0029530100
LP0029580010
LP0029590001
LP0029600000
LP0029610010
LP0029640010
LP0029740100
LP0029781000
LP0029791000
LP0029830001
LP0029840010
LP0029900100

614 rows × 4 columns

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值