作用:将许多不同的处理步骤和机器学习模型链接在一起,为了简化构建变换和模型链的过程,Scikit-Learn提供了pipeline类,可以将多个处理步骤合并为单个Scikit-Learn估计器。pipeline类本身具有fit、predict和score方法,其行为与Scikit-Learn中的其他模型相同。
#模型初始化、网格调参
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split,RandomizedSearchCV
log_reg_sm=LogisticRegression()
log_reg_params={'penalty':['l1','l2'],'C':[0.001,0.01,0.1,1,10,100]}
rand_log_reg=RandomizedSearchCV(log_reg_sm,log_reg_params,n_iter=4)
#得到分类模型的评估指标
for train_index,test_index in sss.split(Original_Xtrain,Original_ytrain):
#“少数类(minority)”:仅重采样少数类;
#“非少数类(not minority)”:重新采样除少数类以外的其他类;
#“非多数类(not majority)”:重新采样除了多数类的其他类;
#'所有(all)':重新采样所有类;
pipline=imbalanced_make_pipeline(SMOTE(sampling_strategy="minority"),
rand_log_reg)
model=pipline.fit(Original_Xtrain[train_index],Original_ytrain[train_index])
best_est=rand_log_reg.best_estimator_
prediction=best_est.predict(Original_Xtrain[test_index])
accuracy_lst.append(pipline.score(Original_Xtrain[test_index],Original_ytrain[test_index]))
precision_lst.append(precision_score(Original_ytrain[test_index],prediction))
recall_lst.append(recall_score(Original_ytrain[test_index],prediction))
f1_lst.append(f1_score(Original_ytrain[test_index],prediction))
auc_lst.append(roc_auc_score(Original_ytrain[test_index],prediction))
print('*'*45)
print('accuracy:{}'.format(np.mean(accuracy_lst)))
print('precision:{}'.format(np.mean(precision_lst)))
print('recall:{}'.format(np.mean(recall_lst)))
print('f1_score:{}'.format(np.mean(f1_lst)))
print('auc:{}'.format(np.mean(auc_lst)))
#得到分类报告
labels = ['No Fraud', 'Fraud']
smote_prediction = best_est.predict(Original_Xtest)
print(classification_report(Original_ytest, smote_prediction, target_names=labels))