任务
根据客户贷款数据预测客户是否会逾期,1表示会,0表示不会。
实现
# -*- coding: utf-8 -*-
"""
Created on Thu Nov 15 13:02:11 2018
@author: keepi
"""
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import f1_score
import xgboost as xgb
import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_row',1000)
#导入数据
data = pd.read_csv('data.csv',encoding='gb18030')
print("data.shape:",data.shape)
#数据处理
miss_rate = data.isnull().sum() / len(data)
print("缺失率:",miss_rate.sort_values(ascending=False))
X_num = data.select_dtypes('number').copy()
X_num.fillna(X_num.mean(),inplace=True)
print("数值型特征的shape:",X_num.shape)
print(X_num.columns)
X_num.drop(['Unnamed: 0','status'],axi