第二十次作业

(这次作业其实我不是很懂,可能代码有些问题,如果有同学来参观的话,请注意一下微笑)

作业:



代码:

主要想法:先生成一系列数据,再把三种算法中可能的参数(C values和n_estimators)都测试一遍,再取所有性能评价的均值。

from sklearn import datasets
from sklearn import cross_validation
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics
#性能评价
def performance_evaluate(y_test,pred):
    acc = metrics.accuracy_score(y_test, pred)
    # print('accuracy_score', acc)
    f1 = metrics.f1_score(y_test, pred)
    # print('f1_score', f1)
    auc = metrics.roc_auc_score(y_test, pred)
    # print('roc_auc_score', auc)
    return [acc,f1,auc]
#取平均值
def evaluation_mean(eva,kf_len):
    print("accuracy_score's mean:", eva[0] / kf_len)
    print("f1_score's mean:", eva[1] / kf_len)
    print("roc_auc_score's mean:", eva[2] / kf_len)

#生成数据
X,y = datasets.make_classification(n_samples=1000, n_features=10)
kf = cross_validation.KFold(len(X), n_folds=10, shuffle=True)
#把测试的数据保留,最后用来取平均
NB_eva=[0,0,0]
SVM_eva=[]
for i in range(0,5):
    SVM_eva.append([0,0,0])
RF_eva=[]
for i in range(0,3):
    RF_eva.append([0, 0, 0])
# 算法要测试的参数
C=[1e-02, 1e-01, 1e00, 1e01, 1e02]
n_estimators_val= [10, 100, 1000]
#运行算法
for train_index, test_index in kf:
    X_train, y_train = X[train_index], y[train_index]
    X_test, y_test = X[test_index], y[test_index]
    # Naive Bayes
    # print("# Naive Bayes")
    clf = GaussianNB()
    clf.fit(X_train, y_train)
    pred = clf.predict(X_test)
    temp=performance_evaluate(y_test,pred)
    for i in range(0,3):
        NB_eva[i]+=temp[i]
    # SVM
    # print("# SVM")
    for i in range(0,5):
        clf = SVC(C=C[i], kernel='rbf', gamma=0.1)
        clf.fit(X_train, y_train)
        pred = clf.predict(X_test)
        temp=performance_evaluate(y_test, pred)
        for j in range(0,3):
            SVM_eva[i][j]+=temp[j]
    # Random Forest
    # print("# Random Forest")
    for i in range(0,3):
        clf = RandomForestClassifier(n_estimators=n_estimators_val[i])
        clf.fit(X_train, y_train)
        pred = clf.predict(X_test)
        temp=performance_evaluate(y_test, pred)
        for j in range(0,3):
            RF_eva[i][j]+=temp[j]

#显示性能
kf_len=len(kf)
# NB evaluation
print('## NB evaluation')
evaluation_mean(NB_eva,kf_len)

# SVM evaluation
print('## SVM evaluation')
for i in range(0,5):
    print('# C value:',C[i])
    evaluation_mean(SVM_eva[i],kf_len)

# RF evaluation
print('## RF evaluation')
for i in range(0,3):
    print('# n_estimators:',n_estimators_val[i])
    evaluation_mean(RF_eva[i],kf_len)

需要等待较长时间,才能出结果:

## NB evaluation
accuracy_score's mean: 0.874
f1_score's mean: 0.875494952449
roc_auc_score's mean: 0.874779982475
## SVM evaluation
# C value: 0.01
accuracy_score's mean: 0.857
f1_score's mean: 0.863186539937
roc_auc_score's mean: 0.861490828985
# C value: 0.1
accuracy_score's mean: 0.895
f1_score's mean: 0.899131421069
roc_auc_score's mean: 0.89547911991
# C value: 1.0
accuracy_score's mean: 0.905
f1_score's mean: 0.907935908659
roc_auc_score's mean: 0.905554637638
# C value: 10.0
accuracy_score's mean: 0.897
f1_score's mean: 0.898372255901
roc_auc_score's mean: 0.897577641232
# C value: 100.0
accuracy_score's mean: 0.829
f1_score's mean: 0.830601043853
roc_auc_score's mean: 0.829781034399
## RF evaluation
# n_estimators: 10
accuracy_score's mean: 0.9
f1_score's mean: 0.901195357409
roc_auc_score's mean: 0.900096300739
# n_estimators: 100
accuracy_score's mean: 0.914
f1_score's mean: 0.916437174175
roc_auc_score's mean: 0.914618139561
# n_estimators: 1000
accuracy_score's mean: 0.915
f1_score's mean: 0.917664046544
roc_auc_score's mean: 0.915546729176

Process finished with exit code 0

分析:可以看到,Random Forest的效果比较好,SVM次之,NB最差,

而在Random Forest,当n_estimators为1000时,效果最好,

SVM则当C=1.0时,效果最好。

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值