代码:
from sklearn import datasets,cross_validation
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics
X, Y = datasets.make_classification(n_samples = 1000, n_features = 10)
kf = cross_validation.KFold(1000, n_folds = 10, shuffle = True)
for train_index, test_index in kf:
X_train, y_train = X[train_index], Y[train_index]
X_test, y_test = X[test_index], Y[test_index]
#GaussianNB
clf1 = GaussianNB()
clf1.fit(X_train, y_train)
pred1 = clf1.predict(X_test)
acc_1 = metrics.accuracy_score(y_test, pred1)
f1_1 = metrics.f1_score(y_test, pred1)
auc_1 = metrics.roc_auc_score(y_test, pred1)
#SVC
clf2 = SVC(C=1e00, kernel='rbf', gamma=0.1)
clf2.fit(X_train, y_train)
pred2 = clf2.predict(X_test)
acc_2 = metrics.accuracy_score(y_test, pred2)
f1_2 = metrics.f1_score(y_test, pred2)
auc_2 = metrics.roc_auc_score(y_test, pred2)
#RandomForestClassifier
clf3 = RandomForestClassifier(n_estimators=100)
clf3.fit(X_train, y_train)
pred3 = clf3.predict(X_test)
acc_3 = metrics.accuracy_score(y_test, pred3)
f1_3 = metrics.f1_score(y_test, pred3)
auc_3 = metrics.roc_auc_score(y_test, pred3)
print("The performance analysis:")
print()
print("GaussianNB:")
print("Accuracy of GaussianNB:", acc_1)
print("F1-score of GaussianNB", f1_1)
print("AUC ROC of GaussianNB:", auc_1)
print()
print("SVC:")
print("Accuracy of SVC:", acc_2)
print("F1-score of SVC", f1_2)
print("AUC ROC of SVC:", auc_2)
print()
print("RandomForestClassifier:")
print("Accuracy of RandomForestClassifier:", acc_3)
print("F1-score of RandomForestClassifier", f1_3)
print("AUC ROC of RandomForestClassifier:", auc_3)
print()
运行结果:
可以看出RandomForestClassifier算法的性能要好于GaussianNB和SVC算法,GaussianNB和SVC算法性能差不多