第十五周作业 —— sklearn

Train the algorithmsGaussianNBiris = datasets.load_iris()dataset = datasets.make_classification(n_samples = 1000, n_features= 10, n_informative= 2, n_redundant = 2, n_repeated = 0, n_classe...
摘要由CSDN通过智能技术生成

Train the algorithms

GaussianNB


iris = datasets. load_iris ()
dataset = datasets. make_classification ( n_samples = 1000 , n_features = 10 ,
n_informative = 2 , n_redundant = 2 , n_repeated = 0 , n_classes = 2 )

#10 - fold 交叉检验
kf = cross_validation. KFold ( 1000 , n_folds = 10 , shuffle = True )

#获取数据
for train_index, test_index in kf:
X_train, X_test = dataset [ 0 ][train_index ],dataset [ 0 ][test_index ]
y_train, y_test = dataset [ 1 ][train_index ],dataset [ 1 ][test_index ]

#高斯朴素贝叶斯
clf = GaussianNB ()
clf. fit (X_train, y_train)
pred = clf. predict (X_test)


acc = metrics. accuracy_score (y_test, pred)
f1 = metrics. f1_score (y_test, pred)
auc = metrics. roc_auc_score (y_test, pred)

结果如下:



SVC (possible C values [1e-02, 1e-01, 1e00, 1e01, 1e02], RBF kernel)

def rbf_svm( X_train, y_train, X_test, C):
clf = SVC ( C = C, kernel = ' rbf ' , class_weight = ' balanced ' )
clf. fit (X_train,y_train)
return clf. predict (X_test)

#样本数量
n_sam = 1000

iris = datasets. load_iris ()
dataset = datasets. make_classification ( n_samples = n_sam , n_features = 10 ,
n_informative = 2 , n_redundant = 2 , n_repeated = 0 , n_classes = 2 )

#使用10fold交叉检验
kf = cross_validation. KFold (n_sam , n_folds = 10 , shuffle = True )

accuracy = []
f1 = []
auc_roc = []

for train_index, test_index in kf:
X_train, X_test = dataset [ 0 ][train_index ],dataset [ 0 ][test_index ]
y_train, y_test = dataset [ 1 ][train_index ],dataset [ 1 ][test_index ]
nn = len (X_train)
bestC = None
Cvalues = [ 1e-2, 1e-1, 1e0, 1e1, 1e2 ]
innerscore = []

#用不同的参数C来对数据集进行训练
for C in Cvalues:
#内部使用5fold交叉检验
ikf = cross_validation. KFold (nn, n_folds = 5 , shuffle = True , random_state = 5678 )
innerf1 = []
for t_index, v_index in ikf:
X_t, X_v = X_train [t_index ], X_train [v_index ]
y_t, y_v = y_train [t_index ], y_train [v_index ]

ipred = rbf_svm (X_t, y_t, X_v, C)

#对检验的f1score进行保存
innerf1. append (metrics. f1_score (y_v, ipred))

innerscore. append ( sum (innerf1) / len (innerf1))

#选出能得出最好的f1score的C
bestC = Cvalues [np. argmax (innerscore)]


#预测分类结果
pred = rbf_svm (X_train, y_train, X_test, bestC)

accuracy. append (metrics. accuracy_score (y_test,pred))

f1. append (metrics. f1_score (y_test,pred))

auc_roc. append (metrics. roc_auc_score (y_test, pred))

结果如下:


RandomForestClassifier (possible n estimators values [10, 100, 1000])

#用不同参数构建分类器
def rbf_ranf( X_train, y_train, X_test, nest):
clf = RandomForestClassifier ( n_estimators = nest)
clf. fit (X_train,y_train)
return clf. predict (X_test)
#样本数量
n_sam = 1000

iris = datasets. load_iris ()
dataset = datasets. make_classification ( n_samples = n_sam , n_features = 10 ,
n_informative = 2 , n_redundant = 2 , n_repeated = 0 , n_classes = 2 )

#使用10fold交叉检验
kf = cross_validation. KFold (n_sam , n_folds = 10 , shuffle = True )

accuracy = []
f1 = []
auc_roc = []

for train_index, test_index in kf:
X_train, X_test = dataset [ 0 ][train_index ],dataset [ 0 ][test_index ]
y_train, y_test = dataset [ 1 ][train_index ],dataset [ 1 ][test_index ]
nn = len (X_train)
bestN = None
N_est = [ 10, 100, 1000 ]
innerscore = []

#用不同的参数n_estimator来对数据集进行训练
for nest in N_est:
ikf = cross_validation. KFold (nn, n_folds = 5 , shuffle = True , random_state = 5678 )
innerf1 = []
for t_index, v_index in ikf:
X_t, X_v = X_train [t_index ], X_train [v_index ]
y_t, y_v = y_train [t_index ], y_train [v_index ]

ipred = rbf_ranf (X_t, y_t, X_v, nest)

innerf1. append (metrics. f1_score (y_v, ipred))

innerscore. append ( sum (innerf1) / len (innerf1))

bestN = N_est [np. argmax (innerscore)]

#预测分类结果
pred = rbf_ranf (X_train, y_train, X_test, bestN)

accuracy. append (metrics. accuracy_score (y_test,pred))

f1. append (metrics. f1_score (y_test,pred))

auc_roc. append (metrics. roc_auc_score (y_test, pred))

结果如下:



结论,从结果来看,随机森林训练法最好

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值