Machine Learning -- algorithms performance measure (heart disease data)

最新推荐文章于 2024-04-10 12:07:03 发布

Opium_Z

最新推荐文章于 2024-04-10 12:07:03 发布

阅读量268

点赞数

分类专栏： Machine Learning

本文链接：https://blog.csdn.net/Opium_Z/article/details/89490946

版权

Machine Learning 专栏收录该内容

2 篇文章 0 订阅

订阅专栏

OverView:

In this chapter, I’m gonna do performance stuffs for machine learning algorithms. From previous chapter, I have already built four model:
SVM-RBF, SVM-PLOY, Bagging, AdaBoost.

Step 1: Confusion Matrix

SVM-RBF (without normalization and with normalization)

在这里插入图片描述

SVM-POLY (without normalization and with normalization)

在这里插入图片描述

Bagging (without normalization and with normalization)

在这里插入图片描述

Adaboost (without normalization and with normalization)

在这里插入图片描述

Step 2: Metrics

Based on the confusion matrix, I will calculate:

F-measure score
accuracy
recall ( sensitivity, true positive rate )
precision ( positive predictive value )
AUC ( Area under the ROC curve )

–> This is a screen shot for all the metrics of each algorithm.

在这里插入图片描述

–> Put them into excel

在这里插入图片描述

–> Rank them to have an intuitive unstanding

在这里插入图片描述

Step 3: Plot ROC Curve

在这里插入图片描述



import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder
from sklearn import tree
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn import svm
from sklearn.model_selection import StratifiedKFold, GridSearchCV
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, f1_score, accuracy_score, recall_score, precision_score, roc_auc_score, \
    roc_curve





## load data
from plot_confusion_matrix import plot_confusion_matrix

trainSet = pd.read_csv("clevelandtrain.csv")
testSet = pd.read_csv("clevelandtest.csv")

xtrain = (trainSet.drop(["heartdisease::category|0|1"], axis=1)).iloc[:,:].values  # (152, 13)
ytrain = trainSet["heartdisease::category|0|1"].iloc[:].values                     # (152,)

xtest = (testSet.drop(["heartdisease::category|0|1"], axis=1)).iloc[:,:].values    # (145, 13)
ytest = testSet["heartdisease::category|0|1"].iloc[:].values                       # (145,)



## data preprocessing

 # without: one-hot-encoder:

# xTrain = xtrain
# yTrain = ytrain
# xTest = xtest
# yTest = ytest

  # one-hot-encoder: #9 (cp), #19 (restecg),  #41 (slope), #51 (thal)

xtrain_pre = trainSet.drop(["cp", "restecg", "slope", "thal", "heartdisease::category|0|1"], axis=1).iloc[:,:].values # (152, 9)
xtrain_cp = trainSet["cp"].iloc[:].values
xtrain_restecg = trainSet["restecg"].iloc[:].values
xtrain_slope = trainSet["slope"].iloc[:].values
xtrain_thal = trainSet["thal"].iloc[:].values

ohe1 = OneHotEncoder(sparse = False,categories='auto',handle_unknown='ignore')
ohe2 = OneHotEncoder(sparse = False,categories='auto',handle_unknown='ignore')
ohe3 = OneHotEncoder(sparse = False,categories='auto',handle_unknown='ignore')
ohe4 = OneHotEncoder(sparse = False,categories='auto',handle_unknown='ignore')

xtrain_cp = ohe1.fit_transform(xtrain_cp.reshape(-1,1))                    # (152, 4)
xtrain_restecg = ohe2.fit_transform(xtrain_restecg.reshape(-1,1))          # (152, 3)
xtrain_slope = ohe3.fit_transform(xtrain_slope.reshape(-1,1))              # (152, 3)
xtrain_thal = ohe4.fit_transform(xtrain_thal.reshape(-1,1))                # (152, 3)


xTrain = np.hstack((xtrain_pre, xtrain_cp, xtrain_restecg, xtrain_slope, xtrain_thal))   # (152, 22)
yTrain = ytrain                                                                          # (152,)



xtest_pre = testSet.drop(["cp", "restecg", "slope", "thal", "heartdisease::category|0|1"], axis=1).iloc[:,:].values   # (145, 9)
xtest_cp = testSet["cp"].iloc[:].values
xtest_restecg = testSet["restecg"].iloc[:].values
xtest_slope = testSet["slope"].iloc[:].values
xtest_thal = testSet["thal"].iloc[:].values

xtest_cp = ohe1.transform(xtest_cp.reshape(-1,1))                 # (145, 4)
xtest_restecg = ohe2.transform(xtest_restecg.reshape(-1,1))       # (145, 3)
xtest_slope = ohe3.transform(xtest_slope.reshape(-1,1))           # (145, 3)
xtest_thal = ohe4.transform(xtest_thal.reshape(-1,1))             # (145, 3)

xTest = np.hstack((xtest_pre, xtest_cp, xtest_restecg, xtest_slope, xtest_thal))   # (145, 22)
yTest = ytest                                                                      # (145,)


print("-----------------------------------------------------------------")


class_names = np.array([1,0])


svcRBF = SVC(C=300.0,gamma=0.0001,kernel='rbf',probability=True)
svcRBF.fit(xTrain,yTrain)
svcRBFScore = svcRBF.score(xTest, yTest)
prediction_svcRBF = svcRBF.predict(xTest)



# # Plot non-normalized confusion matrix
# plot_confusion_matrix(yTest, prediction_svcRBF, classes=class_names, title='Confusion matrix svcRBF, without normalization')
#
# # Plot normalized confusion matrix
# plot_confusion_matrix(yTest, prediction_svcRBF, classes=class_names, normalize=True,
#                       title='Normalized confusion matrix svcRBF')
#
# plt.show()
#
#
# f1 = f1_score(yTest, prediction_svcRBF)
# acc1 = accuracy_score(yTest, prediction_svcRBF)
# rec1 = recall_score(yTest, prediction_svcRBF)
# pre1 = precision_score(yTest, prediction_svcRBF)
# auc1 = roc_auc_score(yTest, prediction_svcRBF)
# print(f1)
# print(acc1)
# print(rec1)
# print(pre1)
# print(auc1)
# print("-----------------------------------------------------------------")




svcPoly = SVC(C=1.0,degree = 8.666666,coef0=1.0,gamma = 'scale',max_iter=-1,kernel='poly',probability=True)
svcPoly.fit(xTrain,yTrain)
svcPolyScore = svcPoly.score(xTest, yTest)
prediction_svcPLOY = svcPoly.predict(xTest)

# # Plot non-normalized confusion matrix
# plot_confusion_matrix(yTest, prediction_svcPLOY, classes=class_names, title='Confusion matrix svcPLOY, without normalization')
#
# # Plot normalized confusion matrix
# plot_confusion_matrix(yTest, prediction_svcPLOY, classes=class_names, normalize=True,
#                       title='Normalized confusion matrix svcPLOY')
#
# plt.show()
#
# f2 = f1_score(yTest, prediction_svcPLOY)
# acc2 = accuracy_score(yTest, prediction_svcPLOY)
# rec2 = recall_score(yTest, prediction_svcPLOY)
# pre2 = precision_score(yTest, prediction_svcPLOY)
# auc2 = roc_auc_score(yTest, prediction_svcPLOY)
# print(f2)
# print(acc2)
# print(rec2)
# print(pre2)
# print(auc2)
# print("-----------------------------------------------------------------")



decisonTree = tree.DecisionTreeClassifier()
decisonTreeBagging = BaggingClassifier(decisonTree,max_samples=0.7, max_features=1.0)
decisonTreeBagging.fit(xTrain,yTrain)
Bagging_score = decisonTreeBagging.score(xTest,yTest)

prediction_Bagging = decisonTreeBagging.predict(xTest)



# # Plot non-normalized confusion matrix
# plot_confusion_matrix(yTest, prediction_Bagging, classes=class_names, title='Confusion matrix decisonTreeBagging, without normalization')
#
# # Plot normalized confusion matrix
# plot_confusion_matrix(yTest, prediction_Bagging, classes=class_names, normalize=True,
#                       title='Normalized confusion matrix decisonTreeBagging')
#
# plt.show()
#
# f3 = f1_score(yTest, prediction_Bagging)
# acc3 = accuracy_score(yTest, prediction_Bagging)
# rec3 = recall_score(yTest, prediction_Bagging)
# pre3 = precision_score(yTest, prediction_Bagging)
# auc3 = roc_auc_score(yTest, prediction_Bagging)
# print(f3)
# print(acc3)
# print(rec3)
# print(pre3)
# print(auc3)
# print("-----------------------------------------------------------------")



decisonTreeAda = AdaBoostClassifier(decisonTree,n_estimators=10,random_state=np.random.RandomState(1))
decisonTreeAda.fit(xTrain,yTrain)
AdaBoost_score = decisonTreeAda.score(xTest,yTest)

prediction_AdaBoost = decisonTreeAda.predict(xTest)


# # Plot non-normalized confusion matrix
# plot_confusion_matrix(yTest, prediction_AdaBoost, classes=class_names, title='Confusion matrix AdaBoost, without normalization')
#
# # Plot normalized confusion matrix
# plot_confusion_matrix(yTest, prediction_AdaBoost, classes=class_names, normalize=True,
#                       title='Normalized confusion matrix AdaBoost')
#
# plt.show()
#
#
# f4 = f1_score(yTest, prediction_AdaBoost)
# acc4 = accuracy_score(yTest, prediction_AdaBoost)
# rec4 = recall_score(yTest, prediction_AdaBoost)
# pre4 = precision_score(yTest, prediction_AdaBoost)
# auc4 = roc_auc_score(yTest, prediction_AdaBoost)
# print(f4)
# print(acc4)
# print(rec4)
# print(pre4)
# print(auc4)
# print("-----------------------------------------------------------------")





## plot roc curve

prediction_prob_svmRBF = svcRBF.predict_proba(xTest)[:, 1]
prediction_prob_svmPOLY = svcPoly.predict_proba(xTest)[:, 1]
prediction_prob_Bagging = decisonTreeBagging.predict_proba(xTest)[:, 1]
prediction_prob_Adaboost = decisonTreeAda.predict_proba(xTest)[:, 1]

fpr_svmRBF, tpr_svmRBF, _ = roc_curve(yTest, prediction_prob_svmRBF)
fpr_svmPOLY, tpr_svmPOLY, _ = roc_curve(yTest, prediction_prob_svmPOLY)
fpr_Bagging, tpr_Bagging, _ = roc_curve(yTest, prediction_prob_Bagging)
fpr_Adaboost, tpr_Adaboost, _ = roc_curve(yTest, prediction_prob_Adaboost)

plt.figure(1)
plt.plot([0, 1], [0, 1], 'k--')

plt.plot(fpr_svmRBF, tpr_svmRBF, label='SVM-RBF')
plt.plot(fpr_svmPOLY, tpr_svmPOLY, label='SVM-POLY')
plt.plot(fpr_Bagging, tpr_Bagging, label='Bagging')
plt.plot(fpr_Adaboost, tpr_Adaboost, label='Adaboost')

plt.xlabel('False positive rate')
plt.ylabel('True positive rate')
plt.title('ROC curve')
plt.legend(loc='best')
plt.show()

Opium_Z

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
Machine Learning -- algorithms performance measure (heart disease data)

OverView:In this chapter, I’m gonna do performance stuffs for machine learning algorithms. From previous chapter, I have already built four model:SVM-RBF, SVM-PLOY, Bagging, AdaBoost.Step 1: Confus...
复制链接

扫一扫

专栏目录