【高级编程技术】【作业】【第十五周】【1】

sklearn练习

from sklearn import datasets
from sklearn import cross_validation
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
import sklearn.metrics as metrics

def evaluate(y_test, pred):
    acc = metrics.accuracy_score(y_test, pred)
    # f1 = metrics.f1_score(y_test, pred, average='micro')
    # auc = metrics.roc_auc_score(y_test, pred)
    # print('acc =', acc)
    # print('f1 =', f1)
    # print('auc =', auc)
    return acc

print('Naive Bayes:')
scores = []
dataset = datasets.make_classification(n_samples=1000, n_features=10, n_informative=8, n_redundant=2, n_repeated=0, n_classes=4)
kf = cross_validation.KFold(len(dataset[0]), n_folds=10, shuffle=True)
for train_index, test_index in kf:
    x_train, y_train = dataset[0][train_index], dataset[1][train_index]
    x_test, y_test = dataset[0][test_index], dataset[1][test_index]
    clf = GaussianNB()
    clf.fit(x_train, y_train)
    pred = clf.predict(x_test)
    scores.append(evaluate(y_test, pred))
print(sum(scores)/len(scores))

print('SVC:')
scores = []
params = [1e-2, 1e-1, 1e0, 1e1, 1e2]
for train_index, test_index in kf:
    x_train, y_train = dataset[0][train_index], dataset[1][train_index]
    x_test, y_test = dataset[0][test_index], dataset[1][test_index]
    for c in params:
        clf = SVC(C=c, kernel='rbf', gamma=0.1)
        clf.fit(x_train, y_train)
        pred = clf.predict(x_test)
        scores.append(evaluate(y_test, pred))
print(sum(scores)/len(scores))

print('Randm Forest:')
scores = []
params = [10, 100, 1000]
for train_index, test_index in kf:
    x_train, y_train = dataset[0][train_index], dataset[1][train_index]
    x_test, y_test = dataset[0][test_index], dataset[1][test_index]
    for estimate in params:
        clf = RandomForestClassifier(n_estimators=estimate)
        clf.fit(x_train, y_train)
        pred = clf.predict(x_test)
        scores.append(evaluate(y_test, pred))
print(sum(scores)/len(scores))

结果:

Naive Bayes:
0.654
SVC:
0.6838
Random Forest:
0.7853333333333332

Random Forest花的时间最长,评估结果也最高,Naive Bayes花的时间最短,评估成绩也最低。

备注:不知道为什么f1_score和roc_auc_score不能编译通过

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值