(十三)GBDT模型用于评分卡模型python实现
前一篇我们已经介绍了GBDT模型用于评分卡模型的原理(理论)
https://blog.csdn.net/LuYi_WeiLin/article/details/88314746
这篇博客附上GBDT模型用于评分卡模型python实现的代码
(之前已经有一篇运用逻辑回归实现申请评分卡的文章https://blog.csdn.net/LuYi_WeiLin/article/details/85060190)
数据集可以去我的资源下载,和逻辑回归实现申请评分卡的数据集一样
代码如下:
import pandas as pd
import time
import numpy as np
import re
from sklearn.ensemble import GradientBoostingClassifier
from sklearn import cross_validation, metrics
from sklearn.model_selection import GridSearchCV, train_test_split
import matplotlib.pylab as plt
import datetime
from dateutil.relativedelta import relativedelta
from numpy import log
from sklearn.metrics import roc_auc_score
from sklearn.feature_extraction import DictVectorizer
from sklearn.preprocessing import OneHotEncoder
from sklearn.linear_model.logistic import LogisticRegression
'''
时间:20190311
作者:小象学院
'''
def CareerYear(x):
#对工作年限进行转换
if str(x).find('nan') > -1:
return -1
elif str(x).find("10+")>-1: #将"10+years"转换成 11
return 11
elif str(x).find('< 1') > -1: #将"< 1 year"转换成 0
return 0
else:
return int(re.sub("\D", "", x)) #其余数据,去掉"years"并转换成整数
def DescExisting(x):
#将desc变量转换成有记录和无记录两种
if type(x).__name__ == 'float':
return 'no desc'
else:
return 'desc'
def ConvertDateStr(x):
mth_dict = {'Jan': 1, 'Feb': 2, 'Mar': 3, 'Apr': 4, 'May': 5, 'Jun': 6, 'Jul': 7, 'Aug': 8, 'Sep': 9, 'Oct': 10,
'Nov': 11, 'Dec': 12}
if str(x) == 'nan':
return datetime.datetime.fromtimestamp(time.mktime(time.strptime('9900-1','%Y-%m')))
#time.mktime 不能读取1970年之前的日期
else:
yr = int(x[4:6])
if yr <=17:
yr = 2000+yr
else:
yr = 1900 + yr
mth = mth_dict[x[:3]]
return datetime.datetime(yr,mth,1)
def MonthGap(earlyDate, lateDate):
if lateDate > earlyDate:
gap = relativedelta(lateDate,earlyDate)
yr = gap.years
mth = gap.months
re