金融风控(四)建模与调参

简单建模

from sklearn.model_selection import KFold
predictors = [ 'loanAmnt', 'term', 'interestRate', 'installment', 'grade',
       'subGrade', 'employmentTitle', 'employmentLength', 'homeOwnership',
       'annualIncome', 'verificationStatus', 
       'purpose', 'postCode', 'regionCode', 'dti', 'delinquency_2years',
       'ficoRangeLow', 'ficoRangeHigh', 'openAcc', 'pubRec',
       'pubRecBankruptcies', 'revolBal', 'revolUtil', 'totalAcc',
       'initialListStatus', 'applicationType', 'earliesCreditLine', 'title',
       'policyCode', 'n0', 'n1', 'n2', 'n3', 'n4', 'n5', 'n6', 'n7', 'n8',
       'n9', 'n10', 'n11', 'n12', 'n13', 'n14', 'issueDateDT']
X_train = train_data[predictors]
X_test =  test_data[predictors]
y_train = train_data["isDefault"]
folds =5
seed = 2020
kf = KFold(n_splits=folds,shuffle=True,random_state=seed)

lightbgm进行建模

from sklearn.model_selection import train_test_split
import lightgbm as lgb
X_train_split,X_val,y_train_split,y_val=train_test_split(X_train,y_train,test_size=0.2)
train_matrix = lgb.Dataset(X_train_split,y_train_split)
valid_matrix = lgb.Dataset(X_val,y_val)

params = {
            'boosting_type': 'gbdt',
            'objective': 'binary',
            'learning_rate': 0.1,
            'metric': 'auc',
            'min_child_weight': 1e-3,
            'num_leaves': 31,
            'max_depth': -1,
            'reg_lambda': 0,
            'reg_alpha': 0,
            'feature_fraction': 1,
            'bagging_fraction': 1,
            'bagging_freq': 0,
            'seed': 2020,
            'nthread': 8,
            'silent': True,
            'verbose': -1,
}
model = lgb.train(params, train_set=train_matrix, valid_sets=valid_matrix, num_boost_round=20000, verbose_eval=1000, early_stopping_rounds=200)

一级标题

// A code block
var foo = 'bar';

roc_auc评分

from sklearn import metrics
from sklearn.metrics import roc_auc_score
val_pre_lag = model.predict(X_val,num_iteration=model.best_iteration)
fpr,tpr,threshold =metrics.roc_curve(y_val,val_pre_lag)
roc_auc = metrics.auc(fpr,tpr)
plt.plot(fpr,tpr,"b",label="VAL AUC=%0.4f"%roc_auc)
plt.plot([0,1],[0,1],"r--")
plt.legend(loc="best")
plt.show()

使用5折交叉验证进行模型性能评估

cv_scores=[]
for i,(train_index,valid_index) in enumerate(kf.split(X_train,y_train)):
    print('************************************ {} ************************************'.format(str(i+1)))
    X_train_split,X_val,y_train_split,y_val=train_test_split(X_train,y_train,test_size=0.2)
    train_matrix = lgb.Dataset(X_train_split,y_train_split)
    valid_matrix = lgb.Dataset(X_val,y_val)

    params = {
            'boosting_type': 'gbdt',
            'objective': 'binary',
            'learning_rate': 0.1,
            'metric': 'auc',
            'min_child_weight': 1e-3,
            'num_leaves': 31,
            'max_depth': -1,
            'reg_lambda': 0,
            'reg_alpha': 0,
            'feature_fraction': 1,
            'bagging_fraction': 1,
            'bagging_freq': 0,
            'seed': 2020,
            'nthread': 8,
            'silent': True,
            'verbose': -1,}
    model = lgb.train(params, train_set=train_matrix, valid_sets=valid_matrix, num_boost_round=20000, verbose_eval=1000, early_stopping_rounds=200)
    val_pre_lag = model.predict(X_val,num_iteration=model.best_iteration)
    cv_scores.append(roc_auc_score(y_val,val_pre_lag))
    print(cv_scores)

print("lgb_scotrainre_list:{}".format(cv_scores))
print("lgb_score_mean:{}".format(np.mean(cv_scores)))
print("lgb_score_std:{}".format(np.std(cv_scores)))
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值