2020-09-27

task --模型融合

import pandas as pd
import numpy as np
from sklearn.ensemble import  RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.metrics import roc_auc_score
import lightgbm as lgb
import joblib

path=r'C:\Users\Ilikeyou Isomeone\Desktop\E-learn\Datawhale\Risk_management\Rawdata'

train=pd.read_table(path+'\\train_result.xls',header=0,index_col=0,sep='\t')
test=pd.read_table(path+'\\test_result.xls',header=0,index_col=0,sep='\t')
#test.drop('isDefault',axis=1,inplace=True)


Y_train=train['isDefault']
X_train=train.drop('isDefault',axis=1)

X_train_train,X_train_test,Y_train_train,Y_train_test=train_test_split(X_train,Y_train,test_size=0.2)

#Random Forest
clf_RF=RandomForestClassifier()
clf_RF.fit(X_train,Y_train)
joblib.dump(clf_RF,path+'clf_RF.model')
#ROC_AOC
Y_train_test_pre=clf_RF.predict(X_train_test)
fpr,tpr,threshold=metrics.roc_curve(Y_train_test,Y_train_test_pre)
roc_auc=metrics.roc_auc_score(fpr,tpr)
print('验证集随机森林roc_auc: ',roc_auc)

#lightGBM
train_lgb = lgb.Dataset(X_train_train,label=Y_train_train)
valid_lgb = lgb.Dataset(X_train_test,label=Y_train_test)
params = {
            'boosting_type': 'gbdt',
            'objective': 'binary',
            'learning_rate': 0.1,
            'metric': 'auc',
            'min_child_weight': 1e-3,
            'num_leaves': 31,
            'max_depth': -1,
            'reg_lambda': 0,
            'reg_alpha': 0,
            'feature_fraction': 1,
            'bagging_fraction': 1,
            'bagging_freq': 0,
            'seed': 2020,
            'nthread': 8,
            'silent': True,
            'verbose': -1,
}
model = lgb.train(params,train_set=train_lgb,valid_sets=valid_lgb,
                  num_boost_round=20000, verbose_eval=1000, early_stopping_rounds=200)

Y_train_test_pre=model.predict(X_train_test)
fpr,tpr,threshold=metrics.roc_curve(Y_train_test.values,Y_train_test_pre,pos_label=1.0)
fpr1=[0 if fpr[i] < threshold[i] else 1 for i in range(len(fpr))]
roc_auc=roc_auc_score(np.array(fpr1),np.array(tpr))
print("lightgbm's roc_auc: ", roc_auc)
joblib.dump(model,path+'lightgbm.model')

import pandas as pd
import numpy as np
import sklearn
import joblib
from sklearn.metrics import roc_auc_score

path=r'C:\Users\Ilikeyou Isomeone\Desktop\E-learn\Datawhale\Risk_management\Rawdata'
test=pd.read_table(path+'\\test_result.xls',header=0,index_col=0,sep='\t')
print(test.info())
test.drop('isDefault',axis=1,inplace=True)
test['purpose_13'] =0
clf_RF=joblib.load(path+'clf_RF.model')
#xx=(test.isna()).sum()
#print(xx)
y_test=clf_RF.predict(test)
print(y_test)
print(len(y_test))

©️2020 CSDN 皮肤主题: 数字20 设计师:CSDN官方博客 返回首页