离散特征的处理

离散特征的处理

  1. 读取数据
  2. 找到所有离散特征
  3. 选择一个离散特征进行独热编码
  4. 采取循环对所有离散特征进行独热编码
  5. 加上昨天的内容 并且处理所有缺失值
data=pd.read_excel('data.xlsx')
#找到离散变量
discrete_lists=[]
for discrete_features in data.columns:
    if data[discrete_features].dtype == 'object':
        discrete_lists.append(discrete_features)

#对离散变量进行编码
data = pd.get_dummies(data, columns=discrete_lists, drop_first=True)
data.columns

 

Index(['Id', 'Annual Income', 'Tax Liens', 'Number of Open Accounts',
       'Years of Credit History', 'Maximum Open Credit',
       'Number of Credit Problems', 'Months since last delinquent',
       'Bankruptcies', 'Current Loan Amount', 'Current Credit Balance',
       'Monthly Debt', 'Credit Score', 'Credit Default',
       'Home Ownership_Home Mortgage', 'Home Ownership_Own Home',
       'Home Ownership_Rent', 'Years in current job_10+ years',
       'Years in current job_2 years', 'Years in current job_3 years',
       'Years in current job_4 years', 'Years in current job_5 years',
       'Years in current job_6 years', 'Years in current job_7 years',
       'Years in current job_8 years', 'Years in current job_9 years',
       'Years in current job_< 1 year', 'Purpose_buy a car',
       'Purpose_buy house', 'Purpose_debt consolidation',
       'Purpose_educational expenses', 'Purpose_home improvements',
       'Purpose_major purchase', 'Purpose_medical bills', 'Purpose_moving',
       'Purpose_other', 'Purpose_renewable energy', 'Purpose_small business',
       'Purpose_take a trip', 'Purpose_vacation', 'Purpose_wedding',
       'Term_Short Term'],
      dtype='object')

 

#找到所有独热编码后的新特征名
data2=pd.read_excel('data.xlsx')
list_final=[]
for i in data.columns:
    if i not in data2.columns:
        list_final.append(i)
list_final
['Home Ownership_Home Mortgage',
 'Home Ownership_Own Home',
 'Home Ownership_Rent',
 'Years in current job_10+ years',
 'Years in current job_2 years',
 'Years in current job_3 years',
 'Years in current job_4 years',
 'Years in current job_5 years',
 'Years in current job_6 years',
 'Years in current job_7 years',
 'Years in current job_8 years',
 'Years in current job_9 years',
 'Years in current job_< 1 year',
 'Purpose_buy a car',
 'Purpose_buy house',
 'Purpose_debt consolidation',
 'Purpose_educational expenses',
 'Purpose_home improvements',
 'Purpose_major purchase',
 'Purpose_medical bills',
 'Purpose_moving',
 'Purpose_other',
 'Purpose_renewable energy',
 'Purpose_small business',
 'Purpose_take a trip',
 'Purpose_vacation',
 'Purpose_wedding',
 'Term_Short Term']
#对bool特征进行类型转换
for i in list_final:
    data[i]=data[i].astype(int)
data.head()  

 

	Id	Annual Income	Tax Liens	Number of Open Accounts	Years of Credit History	Maximum Open Credit	Number of Credit Problems	Months since last delinquent	Bankruptcies	Current Loan Amount	...	Purpose_major purchase	Purpose_medical bills	Purpose_moving	Purpose_other	Purpose_renewable energy	Purpose_small business	Purpose_take a trip	Purpose_vacation	Purpose_wedding	Term_Short Term
0	0	482087.0	0	11	26.3	685960	1	NaN	1.0	99999999	...	0	0	0	0	0	0	0	0	0	1
1	1	1025487.0	0	15	15.3	1181730	0	NaN	0.0	264968	...	0	0	0	0	0	0	0	0	0	0
2	2	751412.0	0	11	35.0	1182434	0	NaN	0.0	99999999	...	0	0	0	0	0	0	0	0	0	1
3	3	805068.0	0	8	22.5	147400	1	NaN	1.0	121396	...	0	0	0	0	0	0	0	0	0	1
4	4	776264.0	0	13	13.6	385836	1	NaN	0.0	125840	...	0	0	0	0	0	0	0	0	0	1
5 rows × 42 columns
#查看缺失值
data.isnull().sum()
Id                                   0
Annual Income                     1557
Tax Liens                            0
Number of Open Accounts              0
Years of Credit History              0
Maximum Open Credit                  0
Number of Credit Problems            0
Months since last delinquent      4081
Bankruptcies                        14
Current Loan Amount                  0
Current Credit Balance               0
Monthly Debt                         0
Credit Score                      1557
Credit Default                       0
Home Ownership_Home Mortgage         0
Home Ownership_Own Home              0
Home Ownership_Rent                  0
Years in current job_10+ years       0
Years in current job_2 years         0
Years in current job_3 years         0
Years in current job_4 years         0
Years in current job_5 years         0
Years in current job_6 years         0
Years in current job_7 years         0
Years in current job_8 years         0
Years in current job_9 years         0
Years in current job_< 1 year        0
Purpose_buy a car                    0
Purpose_buy house                    0
Purpose_debt consolidation           0
Purpose_educational expenses         0
Purpose_home improvements            0
Purpose_major purchase               0
Purpose_medical bills                0
Purpose_moving                       0
Purpose_other                        0
Purpose_renewable energy             0
Purpose_small business               0
Purpose_take a trip                  0
Purpose_vacation                     0
Purpose_wedding                      0
Term_Short Term                      0
dtype: int64

 

#用均值填补
for i in data.columns:
    if data[i].isnull().sum()>0:
        mean_value=data[i].mean()
        data[i].fillna(mean_value, inplace=True)
data.isnull().sum()
Id                                0
Annual Income                     0
Tax Liens                         0
Number of Open Accounts           0
Years of Credit History           0
Maximum Open Credit               0
Number of Credit Problems         0
Months since last delinquent      0
Bankruptcies                      0
Current Loan Amount               0
Current Credit Balance            0
Monthly Debt                      0
Credit Score                      0
Credit Default                    0
Home Ownership_Home Mortgage      0
Home Ownership_Own Home           0
Home Ownership_Rent               0
Years in current job_10+ years    0
Years in current job_2 years      0
Years in current job_3 years      0
Years in current job_4 years      0
Years in current job_5 years      0
Years in current job_6 years      0
Years in current job_7 years      0
Years in current job_8 years      0
Years in current job_9 years      0
Years in current job_< 1 year     0
Purpose_buy a car                 0
Purpose_buy house                 0
Purpose_debt consolidation        0
Purpose_educational expenses      0
Purpose_home improvements         0
Purpose_major purchase            0
Purpose_medical bills             0
Purpose_moving                    0
Purpose_other                     0
Purpose_renewable energy          0
Purpose_small business            0
Purpose_take a trip               0
Purpose_vacation                  0
Purpose_wedding                   0
Term_Short Term                   0
dtype: int64

 

 @浙大疏锦行

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值