对多项式朴素贝叶斯MNB做十折交叉验证,求平均准确率。
加载数据集data:
import pandas as pd
import numpy as np
data = read_sparse_arrf("fbis.wc.arff")
取数据集与类标签:
x=data.iloc[:,0:-1]
y=data.iloc[:,-1]
df = np.array(x)
label = np.array(y)
使用MNB模型训练,做十折交叉:
import time
from sklearn.model_selection import train_test_split, cross_val_score, KFold
from sklearn.naive_bayes import MultinomialNB #使用多项式朴素贝叶斯模型
start_time = time.time()
kf = KFold(n_splits=10, shuffle=True) #n_splits=多少,就是做几折交叉
scores = [] # 存储模型评估结果
for train_index, test_index in kf.split(df):
x_train, x_test = df[train_index], df[test_index]
y_train, y_test = label[train_index], label[test_index]
model = MultinomialNB()
model.fit(x_train,y_train) # 训练模型
score = model.score(x_test, y_test) # 测试模型
scores.append(score)
mean_score = np.mean(scores) # 计算平均准确率
print( mean_score,np.max(scores),np.min(scores)) #输出每次的准确率及平均准确率
end_time = time.time() # 程序结束时间
run_time = end_time - start_time # 程序的运行时间,单位为秒
print("Time:", run_time)