首先调用所需要的库
from sklearn import metrics
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
1、逻辑回归
#训练集
Y_pred_lr = logreg.predict(x_train)
precision=metrics.accuracy_score(y_train,Y_pred_lr)
cnf_matrix=metrics.confusion_matrix(y_train,Y_pred_lr)
print('精准率:','%.3f' %metrics.precision_score(y_train,Y_pred_lr) ,
'召回率:','%.3f' %metrics.recall_score(y_train,Y_pred_lr) ,
'f1_score:','%.3f' %metrics.f1_score(y_train,Y_pred_lr),
'准确率:','%.3f' %metrics.accuracy_score(y_train,Y_pred_lr))
y_train_prob=logreg.predict_proba(x_train)
y_pred_prob=y_train_prob[:,1] #正例率
fpr,tpr,thresholds=metrics.roc_curve(y_train,y_pred_prob) #计算ROC曲线
auc=metrics.auc(fpr,tpr)
auc
#测试集
Y_pred_lr = logreg.predict(x_test)
precision=metrics.accuracy_score(y_test,Y_pred_lr)
cnf_matrix=metrics.confusion_matrix(y_test,Y_pred_lr)
print('精准率:','%.3f' %metrics.precision_score(y_test,Y_pred_lr) ,
'召回率:','%.3f' %metrics.recall_score(y_test,Y_pred_lr) ,
'f1_score:','%.3f' %metrics.f1_score(y_test,Y_pred_lr),
'准确率:','%.3f' %metrics.accuracy_score(y_test,Y_pred_lr))
#绘制ROC曲线
plt.plot(fpr1,tpr1,lw=2,label='train ROC curve (area={:.2f})'.format(auc1))
plt.plot(fpr2,tpr2,lw=2,label='test ROC curve (area={:.2f})'.format(auc2))
plt.plot([0,1],[0,1],'r--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('train ROC')
plt.legend(loc='lower right')
plt.show()
ROC
2.svm
#训练集
from sklearn.svm import SVC
svc = SVC(probability=True)
svc.fit(x_train, y_train)
Y_pred_svc = svc.predict(x_train)
precision=metrics.accuracy_score(y_train,Y_pred_svc)
print('精准率:','%.3f' %metrics.precision_score(y_train,Y_pred_svc) ,
'召回率:','%.3f' %metrics.recall_score(y_train,Y_pred_svc) ,
'f1_score:','%.3f' %metrics.f1_score(y_train,Y_pred_svc),
'准确率:','%.3f' %metrics.accuracy_score(y_train,Y_pred_svc))
#测试集
Y_pred_svc = svc.predict(x_test)
precision=metrics.accuracy_score(y_test,Y_pred_svc)
print('精准率:','%.3f' %metrics.precision_score(y_test,Y_pred_svc) ,
'召回率:','%.3f' %metrics.recall_score(y_test,Y_pred_svc) ,
'f1_score:','%.3f' %metrics.f1_score(y_test,Y_pred_svc),
'准确率:','%.3f' %metrics.accuracy_score(y_test,Y_pred_svc))
y_train_prob=svc.predict_proba(x_train)
y_test_prob=svc.predict_proba(x_test)
y_pred_prob1=y_train_prob[:,1]#正例率
y_pred_prob2=y_test_prob[:,1]#正例率
fpr1,tpr1,thresholds1=metrics.roc_curve(y_train,y_pred_prob1) #计算ROC曲线
fpr2,tpr2,thresholds2=metrics.roc_curve(y_test,y_pred_prob2)
auc1=metrics.auc(fpr1,tpr1)
auc2=metrics.auc(fpr2,tpr2)
#绘制ROC曲线
plt.plot(fpr1,tpr1,lw=2,label='train ROC curve (area={:.2f})'.format(auc1))
plt.plot(fpr2,tpr2,lw=2,label='test ROC curve (area={:.2f})'.format(auc2))
plt.plot([0,1],[0,1],'r--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC curve')
plt.legend(loc='lower right')
plt.show()
3.决策树
#训练集
Y_pred_decision_tree = decision_tree.predict(x_train)
precision=metrics.accuracy_score(y_train,Y_pred_decision_tree)
print('精准率:','%.3f' %metrics.precision_score(y_train,Y_pred_decision_tree) ,
'召回率:','%.3f' %metrics.recall_score(y_train,Y_pred_decision_tree) ,
'f1_score:','%.3f' %metrics.f1_score(y_train,Y_pred_decision_tree),
'准确率:','%.3f' %metrics.accuracy_score(y_train,Y_pred_decision_tree))
#测试集
Y_pred_decision_tree = decision_tree.predict(x_test)
precision=metrics.accuracy_score(y_test,Y_pred_decision_tree)
print('精准率:','%.3f' %metrics.precision_score(y_test,Y_pred_decision_tree) ,
'召回率:','%.3f' %metrics.recall_score(y_test,Y_pred_decision_tree) ,
'f1_score:','%.3f' %metrics.f1_score(y_test,Y_pred_decision_tree),
'准确率:','%.3f' %metrics.accuracy_score(y_test,Y_pred_decision_tree))
y_train_prob=decision_tree.predict_proba(x_train)
y_test_prob=decision_tree.predict_proba(x_test)
y_pred_prob1=y_train_prob[:,1]#正例率
y_pred_prob2=y_test_prob[:,1]#正例率
fpr1,tpr1,thresholds1=metrics.roc_curve(y_train,y_pred_prob1) #计算ROC曲线
fpr2,tpr2,thresholds2=metrics.roc_curve(y_test,y_pred_prob2)
auc1=metrics.auc(fpr1,tpr1)
auc2=metrics.auc(fpr2,tpr2)
#绘制ROC曲线
plt.plot(fpr1,tpr1,lw=2,label='train ROC curve (area={:.2f})'.format(auc1))
plt.plot(fpr2,tpr2,lw=2,label='test ROC curve (area={:.2f})'.format(auc2))
plt.plot([0,1],[0,1],'r--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC curve')
plt.legend(loc='lower right')
plt.show()
4随机森林
#训练集
Y_pred_random_forest = random_forest.predict(x_train)
precision=metrics.accuracy_score(y_train,Y_pred_random_forest)
print('精准率:','%.3f' %metrics.precision_score(y_train,Y_pred_random_forest) ,
'召回率:','%.3f' %metrics.recall_score(y_train,Y_pred_random_forest) ,
'f1_score:','%.3f' %metrics.f1_score(y_train,Y_pred_random_forest),
'准确率:','%.3f' %metrics.accuracy_score(y_train,Y_pred_random_forest))
#测试集
Y_pred_random_forest = random_forest.predict(x_test)
precision=metrics.accuracy_score(y_test,Y_pred_random_forest)
print('精准率:','%.3f' %metrics.precision_score(y_test,Y_pred_random_forest) ,
'召回率:','%.3f' %metrics.recall_score(y_test,Y_pred_random_forest) ,
'f1_score:','%.3f' %metrics.f1_score(y_test,Y_pred_random_forest),
'准确率:','%.3f' %metrics.accuracy_score(y_test,Y_pred_random_forest))
y_train_prob=random_forest.predict_proba(x_train)
y_test_prob=random_forest.predict_proba(x_test)
y_pred_prob1=y_train_prob[:,1]#正例率
y_pred_prob2=y_test_prob[:,1]#正例率
fpr1,tpr1,thresholds1=metrics.roc_curve(y_train,y_pred_prob1) #计算ROC曲线
fpr2,tpr2,thresholds2=metrics.roc_curve(y_test,y_pred_prob2)
auc1=metrics.auc(fpr1,tpr1)
auc2=metrics.auc(fpr2,tpr2)
#绘制ROC曲线
plt.plot(fpr1,tpr1,lw=2,label='train ROC curve (area={:.2f})'.format(auc1))
plt.plot(fpr2,tpr2,lw=2,label='test ROC curve (area={:.2f})'.format(auc2))
plt.plot([0,1],[0,1],'r--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC curve')
plt.legend(loc='lower right')
plt.show()
(xgboost还是没运行出来)
各模型指标输出结果
从测试集来看,逻辑回归综合表现较好。决策树和随机森林的训练集各指标都非常高,而在测试集上却没有体现,数据上可能存在过拟合。