Untitled1

import pandas as pd
df=pd.read_csv("../Desktop/train.csv")
df.head(10)
Loan_IDGenderMarriedDependentsEducationSelf_EmployedApplicantIncomeCoapplicantIncomeLoanAmountLoan_Amount_TermCredit_HistoryProperty_AreaLoan_Status
0LP001002MaleNo0GraduateNo58490.0NaN360.01.0UrbanY
1LP001003MaleYes1GraduateNo45831508.0128.0360.01.0RuralN
2LP001005MaleYes0GraduateYes30000.066.0360.01.0UrbanY
3LP001006MaleYes0Not GraduateNo25832358.0120.0360.01.0UrbanY
4LP001008MaleNo0GraduateNo60000.0141.0360.01.0UrbanY
5LP001011MaleYes2GraduateYes54174196.0267.0360.01.0UrbanY
6LP001013MaleYes0Not GraduateNo23331516.095.0360.01.0UrbanY
7LP001014MaleYes3+GraduateNo30362504.0158.0360.00.0SemiurbanN
8LP001018MaleYes2GraduateNo40061526.0168.0360.01.0UrbanY
9LP001020MaleYes1GraduateNo1284110968.0349.0360.01.0SemiurbanN
df1=df[(df['Education']=='Not Graduate')&(df['Loan_Status']=='Y')&(df['Gender']=='Female')][['Gender','Education','Loan_Status']]
df1.head()
GenderEducationLoan_Status
50FemaleNot GraduateY
197FemaleNot GraduateY
205FemaleNot GraduateY
279FemaleNot GraduateY
403FemaleNot GraduateY
df.loc[:,'new_col']=df['Loan_Status']
df
Loan_IDGenderMarriedDependentsEducationSelf_EmployedApplicantIncomeCoapplicantIncomeLoanAmountLoan_Amount_TermCredit_HistoryProperty_AreaLoan_Statusnew_col
0LP001002MaleNo0GraduateNo58490.0NaN360.01.0UrbanYY
1LP001003MaleYes1GraduateNo45831508.0128.0360.01.0RuralNN
2LP001005MaleYes0GraduateYes30000.066.0360.01.0UrbanYY
3LP001006MaleYes0Not GraduateNo25832358.0120.0360.01.0UrbanYY
4LP001008MaleNo0GraduateNo60000.0141.0360.01.0UrbanYY
5LP001011MaleYes2GraduateYes54174196.0267.0360.01.0UrbanYY
6LP001013MaleYes0Not GraduateNo23331516.095.0360.01.0UrbanYY
7LP001014MaleYes3+GraduateNo30362504.0158.0360.00.0SemiurbanNN
8LP001018MaleYes2GraduateNo40061526.0168.0360.01.0UrbanYY
9LP001020MaleYes1GraduateNo1284110968.0349.0360.01.0SemiurbanNN
10LP001024MaleYes2GraduateNo3200700.070.0360.01.0UrbanYY
11LP001027MaleYes2GraduateNaN25001840.0109.0360.01.0UrbanYY
12LP001028MaleYes2GraduateNo30738106.0200.0360.01.0UrbanYY
13LP001029MaleNo0GraduateNo18532840.0114.0360.01.0RuralNN
14LP001030MaleYes2GraduateNo12991086.017.0120.01.0UrbanYY
15LP001032MaleNo0GraduateNo49500.0125.0360.01.0UrbanYY
16LP001034MaleNo1Not GraduateNo35960.0100.0240.0NaNUrbanYY
17LP001036FemaleNo0GraduateNo35100.076.0360.00.0UrbanNN
18LP001038MaleYes0Not GraduateNo48870.0133.0360.01.0RuralNN
19LP001041MaleYes0GraduateNaN26003500.0115.0NaN1.0UrbanYY
20LP001043MaleYes0Not GraduateNo76600.0104.0360.00.0UrbanNN
21LP001046MaleYes1GraduateNo59555625.0315.0360.01.0UrbanYY
22LP001047MaleYes0Not GraduateNo26001911.0116.0360.00.0SemiurbanNN
23LP001050NaNYes2Not GraduateNo33651917.0112.0360.00.0RuralNN
24LP001052MaleYes1GraduateNaN37172925.0151.0360.0NaNSemiurbanNN
25LP001066MaleYes0GraduateYes95600.0191.0360.01.0SemiurbanYY
26LP001068MaleYes0GraduateNo27992253.0122.0360.01.0SemiurbanYY
27LP001073MaleYes2Not GraduateNo42261040.0110.0360.01.0UrbanYY
28LP001086MaleNo0Not GraduateNo14420.035.0360.01.0UrbanNN
29LP001087FemaleNo2GraduateNaN37502083.0120.0360.01.0SemiurbanYY
.............................................
584LP002911MaleYes1GraduateNo27871917.0146.0360.00.0RuralNN
585LP002912MaleYes1GraduateNo42833000.0172.084.01.0RuralNN
586LP002916MaleYes0GraduateNo22971522.0104.0360.01.0UrbanYY
587LP002917FemaleNo0Not GraduateNo21650.070.0360.01.0SemiurbanYY
588LP002925NaNNo0GraduateNo47500.094.0360.01.0SemiurbanYY
589LP002926MaleYes2GraduateYes27260.0106.0360.00.0SemiurbanNN
590LP002928MaleYes0GraduateNo30003416.056.0180.01.0SemiurbanYY
591LP002931MaleYes2GraduateYes60000.0205.0240.01.0SemiurbanNN
592LP002933NaNNo3+GraduateYes93570.0292.0360.01.0SemiurbanYY
593LP002936MaleYes0GraduateNo38593300.0142.0180.01.0RuralYY
594LP002938MaleYes0GraduateYes161200.0260.0360.01.0UrbanYY
595LP002940MaleNo0Not GraduateNo38330.0110.0360.01.0RuralYY
596LP002941MaleYes2Not GraduateYes63831000.0187.0360.01.0RuralNN
597LP002943MaleNoNaNGraduateNo29870.088.0360.00.0SemiurbanNN
598LP002945MaleYes0GraduateYes99630.0180.0360.01.0RuralYY
599LP002948MaleYes2GraduateNo57800.0192.0360.01.0UrbanYY
600LP002949FemaleNo3+GraduateNaN41641667.0350.0180.0NaNUrbanNN
601LP002950MaleYes0Not GraduateNaN28942792.0155.0360.01.0RuralYY
602LP002953MaleYes3+GraduateNo57030.0128.0360.01.0UrbanYY
603LP002958MaleNo0GraduateNo36764301.0172.0360.01.0RuralYY
604LP002959FemaleYes1GraduateNo120000.0496.0360.01.0SemiurbanYY
605LP002960MaleYes0Not GraduateNo24003800.0NaN180.01.0UrbanNN
606LP002961MaleYes1GraduateNo34002500.0173.0360.01.0SemiurbanYY
607LP002964MaleYes2Not GraduateNo39871411.0157.0360.01.0RuralYY
608LP002974MaleYes0GraduateNo32321950.0108.0360.01.0RuralYY
609LP002978FemaleNo0GraduateNo29000.071.0360.01.0RuralYY
610LP002979MaleYes3+GraduateNo41060.040.0180.01.0RuralYY
611LP002983MaleYes1GraduateNo8072240.0253.0360.01.0UrbanYY
612LP002984MaleYes2GraduateNo75830.0187.0360.01.0UrbanYY
613LP002990FemaleNo0GraduateYes45830.0133.0360.00.0SemiurbanNN

614 rows × 14 columns

df.Education=='Not Graduate'
0      False
1      False
2      False
3       True
4      False
5      False
6       True
7      False
8      False
9      False
10     False
11     False
12     False
13     False
14     False
15     False
16      True
17     False
18      True
19     False
20      True
21     False
22      True
23      True
24     False
25     False
26     False
27      True
28      True
29     False
       ...  
584    False
585    False
586    False
587     True
588    False
589    False
590    False
591    False
592    False
593    False
594    False
595     True
596     True
597    False
598    False
599    False
600    False
601     True
602    False
603    False
604    False
605     True
606    False
607     True
608    False
609    False
610    False
611    False
612    False
613    False
Name: Education, Length: 614, dtype: bool
def num_missing(x):
    return sum(x.isnull())
sum(df['Gender'].isnull())
13
df.apply(num_missing,axis=0)
Loan_ID               0
Gender               13
Married               3
Dependents           15
Education             0
Self_Employed        32
ApplicantIncome       0
CoapplicantIncome     0
LoanAmount           22
Loan_Amount_Term     14
Credit_History       50
Property_Area         0
Loan_Status           0
new_col               0
dtype: int64
df.apply(num_missing,axis=1)
0      1
1      0
2      0
3      0
4      0
5      0
6      0
7      0
8      0
9      0
10     0
11     1
12     0
13     0
14     0
15     0
16     1
17     0
18     0
19     2
20     0
21     0
22     0
23     1
24     2
25     0
26     0
27     0
28     0
29     1
      ..
584    0
585    0
586    0
587    0
588    1
589    0
590    0
591    0
592    1
593    0
594    0
595    0
596    0
597    1
598    0
599    0
600    2
601    1
602    0
603    0
604    0
605    1
606    0
607    0
608    0
609    0
610    0
611    0
612    0
613    0
Length: 614, dtype: int64
df2=df.apply(num_missing,axis=1)
df2.head(10)
0    1
1    0
2    0
3    0
4    0
5    0
6    0
7    0
8    0
9    0
dtype: int64
df['Gender'].mode().iloc[0]
'Male'
df['Gender'].fillna(df['Gender'].mode().iloc[0],inplace=True)
sum(df['Gender'].isnull())
0
import numpy as np
df.pivot_table(values=["LoanAmount"],index=["Gender","Married","Self_Employed"],aggfunc=np.mean)
LoanAmount
GenderMarriedSelf_Employed
FemaleNoNo110.596774
Yes125.800000
YesNo135.480000
Yes282.250000
MaleNoNo127.500000
Yes180.588235
YesNo153.982699
Yes169.395833
for i ,row in df.loc[data["LoanAmount"].isnull(),:].iterrows():
    ind=tuple([row['Gender'],row['Married'],row['Self_Employed']])
    df.loc[i,"LoanAmount"]=impute_grps.loc{ind}.values[0]
  File "<ipython-input-35-99e2725d767b>", line 3
    df.loc[i,"LoanAmount"]=impute_grps.loc{ind}.values[0]
                                          ^
SyntaxError: invalid syntax
grps=df.groupby(["Gender","Married","Self_Employed"])
grps["LoanAmount"].mean()
Gender  Married  Self_Employed
Female  No       No               110.596774
                 Yes              125.800000
        Yes      No               135.480000
                 Yes              282.250000
Male    No       No               127.500000
                 Yes              180.588235
        Yes      No               153.982699
                 Yes              169.395833
Name: LoanAmount, dtype: float64
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值