一、鸢尾花数据集朴素贝叶斯实现
# 导入算法包以及数据集
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report,confusion_matrix
from sklearn.naive_bayes import MultinomialNB,BernoulliNB,GaussianNB
# 载入数据
iris = load_iris()
x_train,x_test,y_train,y_test = train_test_split(iris.data, iris.target,test_size=0.3,random_state=1)
mul_nb = MultinomialNB()
mul_nb.fit(x_train,y_train)
print('多项式模型:')
print('多项式模型准确率',mul_nb.score(x_test,y_test))
print(classification_report(mul_nb.predict(x_test),y_test))
print(confusion_matrix(mul_nb.predict(x_test),y_test))
ber_nb = BernoulliNB()
ber_nb.fit(x_train,y_train)
print('伯努利模型:')
print('伯努利模型准确率',ber_nb.score(x_test,y_test))
print(classification_report(ber_nb.predict(x_test),y_test))
print(confusion_matrix(ber_nb.predict(x_test),y_test))
gau_nb = GaussianNB()
gau_nb.fit(x_train,y_train)
print('高斯模型:')
print('高斯模型准确率',gau_nb.score(x_test,y_test))
print(classification_report(gau_nb.predict(x_test),y_test))
print(confusion_matrix(gau_nb.predict(x_test),y_test))
输出结果为:
由于鸢尾花的特征为花瓣和花萼的长度和宽度,均为连续数值,所以最适用