sklearn习题

Steps

1 Create a classification dataset (n samples ! 1000, n features ! 10)
2 Split the dataset using 10-fold cross validation
3 Train the algorithms
I GaussianNB
I SVC (possible C values [1e-02, 1e-01, 1e00, 1e01, 1e02], RBF kernel)
I RandomForestClassifier (possible n estimators values [10, 100, 1000])
4 Evaluate the cross-validated performance
I Accuracy
I F1-score
I AUC ROC
5 Write a short report summarizing the methodology and the results

from sklearn import datasets
from sklearn import cross_validation
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics
import numpy as np

def rbf_svm(x_train, y_train, x_test, c):
    clf = SVC(C=c,kernel='rbf',gamma=0.1)
    clf.fit(x_train,y_train)
    return clf.predict(x_test)

def rbf_random(x_train,y_train,x_test,n):
    clf = RandomForestClassifier(n_estimators=n)
    clf.fit(x_train,y_train)
    return clf.predict(x_test)

#choose the best c and the pred
def svm(x_train,y_train,x_test,y_test):
    nn = len(x_train)
    bestC = None
    Cvalues = [1e-2,1e-1,1e0,1e1,1e2]
    innerscore = []
    for c in Cvalues:
        ikf = cross_validation.KFold(nn,n_folds=5,shuffle=True,
            random_state=5678)
        innerf1 = []
        for t_index,v_index in ikf:
            x_t, x_v = x_train[t_index], x_train[v_index]
            y_t, y_v = y_train[t_index], y_train[v_index]
            ipred = rbf_svm(x_t,y_t,x_v,c)
            innerf1.append(metrics.f1_score(y_v,ipred))
        innerscore.append(sum(innerf1)/len(innerf1))
    bestC = Cvalues[np.argmax(innerscore)]
    return rbf_svm(x_train,y_train,x_test,bestC)

#choose the best n and the pred
def ran(x_train,y_train,x_test,y_test):
    nn = len(x_train)
    bestN = None
    N = [10,100,1000]
    innerscore = []
    for n in N:
        ikf = cross_validation.KFold(nn,n_folds=3,shuffle=True,
            random_state=5678)
        innerf1 = []
        for t_index, v_index in ikf:
            x_t, x_v = x_train[t_index], x_train[v_index]
            y_t, y_v = y_train[t_index], y_train[v_index]
            ipred = rbf_random(x_t, y_t, x_v, n)
            innerf1.append(metrics.f1_score(y_v,ipred))
        innerscore.append(sum(innerf1)/len(innerf1))
    bestN = N[np.argmax(innerscore)]
    return rbf_random(x_train,y_train,x_test,bestN)


X,y = datasets.make_classification(n_samples=1000,n_features=10,
    n_informative = 2, n_redundant=2,n_repeated=0,n_classes=2)
kf = cross_validation.KFold(len(y),n_folds=10,shuffle=True)

g_accuracy = []
g_f1 = []
g_auc_roc = []
s_accuracy = []
s_f1 = []
s_auc_roc = []
r_accuracy = []
r_f1 = []
r_auc_roc = []

for train_index,test_index in kf:
    x_train , y_train = X[train_index], y[train_index]
    x_test, y_test = X[test_index], y[test_index]

    clf = GaussianNB()
    clf.fit(x_train,y_train)
    pred = clf.predict(x_test)
    g_accuracy.append(metrics.accuracy_score(y_test,pred))
    g_f1.append(metrics.f1_score(y_test,pred))
    g_auc_roc.append(metrics.roc_auc_score(y_test,pred))

    pred = svm(x_train,y_train,x_test,y_test)
    s_accuracy.append(metrics.accuracy_score(y_test,pred))
    s_f1.append(metrics.f1_score(y_test,pred))
    s_auc_roc.append(metrics.roc_auc_score(y_test,pred))

    pred = ran(x_train,y_train,x_test,y_test)
    r_accuracy.append(metrics.accuracy_score(y_test,pred))
    r_f1.append(metrics.f1_score(y_test,pred))
    r_auc_roc.append(metrics.roc_auc_score(y_test,pred))

print('GuassianNB:')
for i in range(0,len(g_f1)):
    print('for test'+str(i)+':')
    print('accuracy:'+str(g_accuracy[i]))
    print('f1:'+str(g_f1[i]))
    print('auc_roc:'+str(g_auc_roc[i]))
    print('---------------------------')

print('SVC:')
for i in range(0,len(s_f1)):
    print('for test'+str(i)+':')
    print('accuracy:'+str(s_accuracy[i]))
    print('f1:'+str(s_f1[i]))
    print('auc_roc:'+str(s_auc_roc[i]))
    print('---------------------------')

print('RandomForestClassifier:')
for i in range(0,len(r_f1)):
    print('for test'+str(i)+':')
    print('accuracy:'+str(r_accuracy[i]))
    print('f1:'+str(r_f1[i]))
    print('auc_roc:'+str(r_auc_roc[i]))
    print('---------------------------')

这里写图片描述

这里写图片描述

这里写图片描述

这里写图片描述

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值