题目:
代码展示:
from sklearn import datasets, cross_validation, metrics
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
dataset = datasets.make_classification(n_samples = 1500, n_features = 10, n_informative=2, n_redundant=2, n_classes = 2)
X, y = dataset[0], dataset[1]
kf = cross_validation.KFold(len(dataset[0]), n_folds = 10, shuffle = True)
for train_index,test_index in kf:
X_train,y_train=X[train_index],y[train_index]
X_test,y_test=X[test_index],y[test_index]
clf = GaussianNB()
clf.fit(X_train, y_train)
pred = clf.predict(X_test)
print("GaussianNB:")
acc = metrics.accuracy_score(y_test, pred)
print('ACU score:', acc)
f1 = metrics.f1_score(y_test, pred)
print('f1 score:', f1)
auc = metrics.roc_auc_score(y_test, pred)
print('ROC ACU score:', auc)
print('\n')
for C in [1e-02, 1e-01, 1e00, 1e01, 1e02]:
clf = SVC(C)
clf.fit(X_train, y_train)
pred = clf.predict(X_test)
print("SVM for C="+str(C))
acc = metrics.accuracy_score(y_test, pred)
print('ACU score:', acc)
f1 = metrics.f1_score(y_test, pred)
print('f1 score:', f1)
auc = metrics.roc_auc_score(y_test, pred)
print('ROC ACU score:', auc)
print('\n')
for n_estimators in [10, 100, 1000]:
clf = RandomForestClassifier(n_estimators)
clf.fit(X_train, y_train)
pred = clf.predict(X_test)
print("SVM for n_estimators="+str(n_estimators))
acc = metrics.accuracy_score(y_test, pred)
print('ACU score:', acc)
f1 = metrics.f1_score(y_test, pred)
print('f1 score:', f1)
auc = metrics.roc_auc_score(y_test, pred)
print('ROC ACU score:', auc)
print('\n')
结果:
GaussianNB:
ACU score: 0.9333333333333333
f1 score: 0.9367088607594937
ROC ACU score: 0.9326923076923076
SVM for C=0.01
ACU score: 0.9333333333333333
f1 score: 0.9390243902439025
ROC ACU score: 0.9310897435897436
SVM for C=0.1
ACU score: 0.9466666666666667
f1 score: 0.9506172839506173
ROC ACU score: 0.9449786324786326
SVM for C=1.0
ACU score: 0.9533333333333334
f1 score: 0.9565217391304347
ROC ACU score: 0.9519230769230769
SVM for C=10.0
ACU score: 0.94
f1 score: 0.9447852760736197
ROC ACU score: 0.9380341880341879
SVM for C=100.0
ACU score: 0.9266666666666666
f1 score: 0.9325153374233129
ROC ACU score: 0.9246794871794872
SVM for n_estimators=10
ACU score: 0.9466666666666667
f1 score: 0.9493670886075949
ROC ACU score: 0.9460470085470086
SVM for n_estimators=100
ACU score: 0.9466666666666667
f1 score: 0.9500000000000001
ROC ACU score: 0.9455128205128205
SVM for n_estimators=1000
ACU score: 0.9466666666666667
f1 score: 0.9500000000000001
ROC ACU score: 0.9455128205128205