1、Create a classification dataset(n_samples>=1000,n_features>=10)
代码如下:
from sklearn import datasets
x, y = datasets.make_classification(n_samples=1000, n_features=10, n_classes=2)
print(x,y)
2、Split the dataset using 10-fold cross validation
代码如下:
from sklearn import cross_validation
from sklearn import datasets
x, y = datasets.make_classification(n_samples=1000, n_features=10, n_classes=2)
print(x, y)
print("------")
fold = cross_validation.KFold(len(x), n_folds=10, shuffle=True)
x1 = []
y1 = []
x2 = []
y2 = []
for n1, n2 in fold:
x1, y1 = x[n1], y[n1]
x2, y2 = x[n2], y[n2]
print(x1,y1,x2,y2)
3、Train the algorithms
GaussianNB
SVC
RandomForestClassifier
代码如下:
from sklearn import cross_validation
from sklearn import datasets
from sklearn.naive_bayes import GaussianNB
from sklearn import metrics
x, y = datasets.make_classification(n_samples=1000, n_features=10, n_classes=2)
print(x, y)
print("------")
fold = cross_validation.KFold(len(x), n_folds=10, shuffle=True)
x1 = []
y1 = []
x2 = []
y2 = []
for n1, n2 in fold:
x1, y1 = x[n1], y[n1]
x2, y2 = x[n2], y[n2]
print(x1,y1,x2,y2)
print("------")
a = GaussianNB()
a.fit(x1, y1)
pre = a.predict(x2)
accuracy = metrics.accuracy_score(y2, pre)
f11 = metrics.f1_score(y2, pre)
roc1 = metrics.roc_auc_score(y2, pre)
print(pre,y2,accuracy,f11,roc1)
4、Evaluate the cross-validated performance
Accuracy
F1-score
AUC ROC
代码如下:
from sklearn import cross_validation
from sklearn import datasets
from sklearn.naive_bayes import GaussianNB
from sklearn import metrics
x, y = datasets.make_classification(n_samples=1000, n_features=10, n_classes=2)
print(x, y)
print("------")
fold = cross_validation.KFold(len(x), n_folds=10, shuffle=True)
x1 = []
y1 = []
x2 = []
y2 = []
for n1, n2 in fold:
x1, y1 = x[n1], y[n1]
x2, y2 = x[n2], y[n2]
print(x1,y1,x2,y2)
print("------")
a = GaussianNB()
a.fit(x1, y1)
pre = a.predict(x2)
accuracy = metrics.accuracy_score(y2, pre)
f11 = metrics.f1_score(y2, pre)
roc1 = metrics.roc_auc_score(y2, pre)
print(pre,y2,accuracy,f11,roc1)
print("------")
for test in [1e-02, 1e-01, 1e00, 1e01, 1e02]:
clf = SVC(test, kernel='rbf', gamma=0.1)
clf.fit(x1, y1)
predict = clf.predict(x2)
accuracy = metrics.accuracy_score(y2, predict)
f12 = metrics.f1_score(y2, predict)
roc2 = metrics.roc_auc_score(y2, predict)
print(predict,y2,accuracy,f12,roc2)