from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import auc,roc_curve
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix
from sklearn import metrics
models = [('RF', RandomForestClassifier(max_depth=3, random_state=0)),
('DT', DecisionTreeClassifier(random_state=0)),
('LR', LogisticRegression()),
('GaussianNB', GaussianNB()),
('SVC',SVC(probability=True)),
('KNN',KNeighborsClassifier())
]
def calculate_metric(label, y_prob,optimal_threshold):
p=[]
for i in y_prob:
if i>=optimal_threshold:
p.append(1)
else:
p.append(0)
confusion = confusion_matrix(label,p)
TP = confusion[1, 1]
TN = confusion[0, 0]
FP = confusion[0, 1]
FN = confusion[1, 0]
Accuracy=(TP+TN)/float(TP+TN+FP+FN)
Sensitivity=TP / float(TP+FN)
Specificity=TN / float(TN+FP)
Recall = TP / float(TP+TN)
Precision = TP/float(TP + FP)
Gmean = (Recall*Specificity)**(0.5)
return Accuracy,Sensitivity,Specificity,Recall,Precision,Gmean
results=[]
roc_=[]
pr_ = []
for name,model in models:
clf=model.fit(X_train,y_train)
pred_proba = clf.predict_proba(X_test)
y_pred_class = clf.predict(X_test)
y_prob=pred_proba[:,1]
fpr, tpr, roc_auc, optimal_threshold, optimal_point=ROC(y_test, y_prob)
Accuracy,Sensitivity,Specificity,Recall,Precision,Gmean=calculate_metric(y_test, y_prob,optimal_threshold)
F1 = metrics.recall_score(y_test, y_pred_class)
ks_value = max(abs(fpr-tpr))
result=[optimal_threshold,Accuracy,Sensitivity,Specificity,Recall,F1,ks_value,Gmean,roc_auc,name]
results.append(result)
roc = [fpr,tpr,roc_auc,name]
roc_.append(roc)
pre,rec, thresholds2 = metrics.precision_recall_curve(y_test, y_prob)
AP = average_precision_score(y_test, y_prob)
pr = [pre,rec,AP,name]
pr_.append(pr)
df_result=pd.DataFrame(results)
df_result.columns=["Optimal_threshold","Accuracy","Sensitivity","Specificity",'Recall','F1','ks_value','Gmean',"AUC_ROC","Model_name"]
df_result
基于python计算多种机器学习方式训练模型的混淆矩阵
于 2024-01-07 12:26:30 首次发布