sklearn 练习

sklearn


代码

from sklearn import datasets
from sklearn import cross_validation
from sklearn.naive_bayes import GaussianNB 
from sklearn import metrics
from sklearn.svm import SVC 
from sklearn.ensemble import RandomForestClassifier    


######################################################
#  cross_validation
######################################################
dataset = datasets.make_classification(n_samples=1000, n_features=10)
X,Y = dataset

kf = cross_validation.KFold(1000,n_folds = 10, shuffle = True)
for train_index,test_index in kf:
    x_train,y_train = X[train_index] , Y[train_index]
    x_test, y_test = X[test_index], Y[test_index]

print(x_test)
print(x_train)
print(y_test)
print(y_train)
######################################################
#   GaussianNB
######################################################
for train_index,test_index in kf:
    x_train,y_train = X[train_index] , Y[train_index]
    x_test, y_test = X[test_index], Y[test_index]
    clf = GaussianNB()
    clf.fit(x_train,y_train)
    pred = clf.predict(x_test)
    print(pred)
    print(y_test)
    Accuracy = metrics.accuracy_score(y_test, pred)
    F1_score = metrics.f1_score(y_test, pred)
    AUC_ROC= metrics.roc_auc_score(y_test, pred)
    print("GaussianNB",Accuracy,F1_score,AUC_ROC)

######################################################
#   svc
######################################################
for train_index,test_index in kf:
    x_train,y_train = X[train_index] , Y[train_index]
    x_test, y_test = X[test_index], Y[test_index]
    clf = SVC(C = 1e-01,kernel='rbf',gamma = 0.1)
    clf.fit(x_train,y_train)
    pred = clf.predict(x_test)
    print(pred)
    print(y_test)
    Accuracy = metrics.accuracy_score(y_test, pred)
    F1_score = metrics.f1_score(y_test, pred)
    AUC_ROC= metrics.roc_auc_score(y_test, pred)
    print("svc",Accuracy,F1_score,AUC_ROC)
######################################################
#    RandomForestClassifier 
######################################################

for train_index,test_index in kf:
    x_train,y_train = X[train_index] , Y[train_index]
    x_test, y_test = X[test_index], Y[test_index]
    clf =  RandomForestClassifier(n_estimators=6)
    clf.fit(x_train,y_train)
    pred = clf.predict(x_test)
    print(pred)
    print(y_test)
    Accuracy = metrics.accuracy_score(y_test, pred)
    F1_score = metrics.f1_score(y_test, pred)
    AUC_ROC= metrics.roc_auc_score(y_test, pred)
    print("RandomForestClassifier",Accuracy,F1_score,AUC_ROC)

输出

输出太长,部分就不放上来了

GaussianNB 0.94 0.9454545454545454 0.9380032206119162
GaussianNB 0.87 0.8737864077669902 0.8725961538461539
GaussianNB 0.95 0.9504950495049506 0.9505802320928372
GaussianNB 0.97 0.9739130434782608 0.9679722562219502
GaussianNB 0.93 0.9320388349514563 0.9297719087635055
GaussianNB 0.9 0.8979591836734694 0.9020473705339221
GaussianNB 0.91 0.8831168831168832 0.9241758241758242
GaussianNB 0.93 0.94017094017094 0.9265188834154352
GaussianNB 0.94 0.9375000000000001 0.9409875551987155
GaussianNB 0.96 0.9649122807017544 0.9555555555555555
svc 0.96 0.9622641509433962 0.961352657004831
svc 0.93 0.9292929292929293 0.9310897435897436
svc 0.93 0.9292929292929293 0.9301720688275311
svc 0.95 0.9565217391304347 0.9475724194206445
svc 0.95 0.9494949494949494 0.950580232092837
svc 0.95 0.9462365591397849 0.9492171818546769
svc 0.93 0.9066666666666667 0.9395604395604396
svc 0.93 0.94017094017094 0.9265188834154352
svc 0.96 0.9574468085106385 0.9598554797270173
svc 0.97 0.9724770642201834 0.9707070707070706
RandomForestClassifier 0.96 0.9615384615384615 0.962962962962963
RandomForestClassifier 0.94 0.9387755102040817 0.9407051282051283
RandomForestClassifier 0.95 0.9484536082474228 0.9497799119647861
RandomForestClassifier 0.95 0.9557522123893805 0.9504283965728273
RandomForestClassifier 0.93 0.9278350515463919 0.9309723889555822
RandomForestClassifier 0.95 0.9462365591397849 0.9492171818546769
RandomForestClassifier 0.97 0.9577464788732395 0.9703296703296703
RandomForestClassifier 0.94 0.9473684210526316 0.9417077175697864
RandomForestClassifier 0.97 0.967741935483871 0.9692894419911683
RandomForestClassifier 0.96 0.9629629629629629 0.9616161616161615

从左到右分别是 Accuracy,F1_score,AUC_ROC
总体上来说随机森林算法和svc差不多,而GaussianNB相对来说较弱

总体来说这次作业相对比较简单,可以在老师的pdf上找到源码,同时让我手动查找的内容相对较少

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值