任务
预测贷款客户是否会逾期,status为响应变量,有0和1两种值,0表示未逾期,1表示逾期。
代码:
# -*- coding: utf-8 -*-
"""
Created on Thu Nov 15 13:02:11 2018@author: keepi
"""import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
pd.set_option('display.max_row',1000)#导入数据
data = pd.read_csv('data.csv',encoding='gb18030')
data = pd.DataFrame(data.fillna(10))#特征工程
'''
n = set(data['reg_preference_for_trad'])
dic = {}
for i,j in enumerate(n):
dic[j] = i
data['reg_preference_for_trad'] = data['reg_preference_for_trad'].map(dic)
'''
x_dummy = pd.get_dummies(data['