题目

Step 1:
sklearn.datasets.make_classification 函数能随机产生 n_classes 类数据集,返回一组样本(sample)和一组标记(label)。
from sklearn import datasets
from sklearn import model_selection
from sklearn import naive_bayes
from sklearn import svm
from sklearn import ensemble
from sklearn import metrics
n_samples = 1000
n_features = 10
X, y = datasets.make_classification(n_samples=n_samples, n_features=n_features)
Step 2:
使用 sklearn.model_selection.KFold 类的 split 函数。
n_splits = 10
kf = model_selection.KFold(n_splits=n_splits, shuffle=True)
for train_index, test_index in kf.split(X):
X_train, X_test = X[train_index], X[test_index]
y_train, y_test = y[train_index], y[test_index]
Step 3:
分别使用 GaussianNB 类、SVC 类、 RandomForestClassifier 类的 fit 函数和 predict 函数。
algorithms = []
algorithms.append(naive_bayes.GaussianNB())
algorithms.append(svm.SVC(C=1e-1, kernel='rbf'))
algorithms.append(ensemble.RandomForestClassifier(n_estimators=10))
for algorithm in algorithms:
algorithm.fit(X_train, y_train)
pred = algorithm.predict(X_test)
Step 4:
也是直接调用函数。
acc = metrics.accuracy_score(y_test, pred)
f1 = metrics.f1_score(y_test, pred)
auc = metrics.roc_auc_score(y_test, pred)
完整代码
from sklearn import datasets
from sklearn import model_selection
from sklearn import naive_bayes
from sklearn import svm
from sklearn import ensemble
from sklearn import metrics
n_samples = 1000
n_features = 10
X, y = datasets.make_classification(n_samples=n_samples, n_features=n_features)
algorithms = []
algorithms.append(naive_bayes.GaussianNB())
algorithms.append(svm.SVC(C=1e-1, kernel='rbf'))
algorithms.append(ensemble.RandomForestClassifier(n_estimators=10))
count = 0
n_splits = 10
kf = model_selection.KFold(n_splits=n_splits, shuffle=True)
for train_index, test_index in kf.split(X):
X_train, X_test = X[train_index], X[test_index]
y_train, y_test = y[train_index], y[test_index]
print('Case #' + str(count) + ':')
count += 1
for algorithm in algorithms:
algorithm.fit(X_train, y_train)
pred = algorithm.predict(X_test)
acc = metrics.accuracy_score(y_test, pred)
f1 = metrics.f1_score(y_test, pred)
auc = metrics.roc_auc_score(y_test, pred)
print('acc:', acc)
print('f1:', f1)
print('auc:', auc)
print()