机器学习实验:朴素贝叶斯算法实现对iris鸢尾花数据集的分类
实验内容:
- 通过sklearn提供的贝叶斯算法模型实现对iris鸢尾花数据集的分类及预测
- 使用PCA降维,并通过matplotlib对决策边进行绘制
代码如下:
-
导入相关库
import pandas as pd import numpy as np import matplotlib.colors as colors import matplotlib as mpl import matplotlib.pyplot as plt import sklearn.metrics as metrics from sklearn.preprocessing import label_binarize from sklearn.datasets import load_iris from sklearn.datasets import load_wine from sklearn.datasets import load_breast_cancer from sklearn.model_selection import train_test_split import sklearn.naive_bayes as bayes from sklearn.decomposition import PCA mpl.rcParams['font.sans-serif']=[u'simHei'] mpl.rcParams['axes.unicode_minus']=False
-
导入数据集,并对数据集进行处理
iris = load_iris() #iris数据集 X_iris=iris.data Y_iris=iris.target train_x_iris,test_x_iris,train_y_iris,test_y_iris=train_test_split(X_iris,Y_iris,test_size=0.3,random_state=2) # print(train_x_iris.shape) #(105, 4) # print(test_x_iris.shape) #(45, 4)
-
实现绘图函数
def drawing(train_x,test_x,train_y,test_y,Name='Gaussian',N=500): if(Name=='Gaussian'): model = bayes.GaussianNB() elif(Name == 'Bernoulli'): model = bayes.BernoulliNB() elif(Name == 'Multinomial'): model = bayes.MultinomialNB() else: model = bayes.GaussianNB() #默认 pca=PCA(n_components=2) #降为二维 train_x_iris=pca.fit_transform(train_x_iris,train_y_iris) #(105, 2) test_x_iris=pca.fit_transform(test_x_iris,test_y_iris) #(45, 2) model.fit(train_x,train_y) min_train_x1,min_train_x2=np.min(train_x,axis=0) max_train_x1,max_train_x2=np.max(train_x,axis=0) min_test_x1,min_test_x2=np.min(test_x,axis=0) max_test_x1,max_test_x2=np.max(test_x,axis=0) x1min=min(min_train_x1,min_test_x1) x1max=max(max_train_x1,max_test_x1) x1=np.linspace(x1min,x1max,N) x2min=min(min_train_x2,min_test_x2) x2max=max(max_train_x2,max_test_x2) x2=np.linspace(x2min,x2max,N) x1,x2=np.meshgrid(x1,x2) X=np.c_[x1.ravel(),x2.ravel()] y_hat=model.predict(X) plt.figure() plt.subplot(2,1,1) cmp=colors.ListedColormap([u'indigo', u'gold', u'hotpink', u'firebrick', u'indianred']) plt.pcolormesh(x1,x2,y_hat.reshape(x1.shape),shading='auto',cmap=cmp) plt.scatter(train_x[:,0],train_x[:,1],c=train_y) plt.scatter(test_x[:,0],test_x[:,1],c=test_y) plt.title(Name+":"+str(model.score(test_x,test_y))) plt.subplot(2,1,2) label=label_binarize(test_y,classes=(0,1,2)) fpr,tpr,threofor=metrics.roc_curve(label.ravel(),model.predict_proba(test_x).ravel()) plt.plot(fpr,tpr,c="red",label="ROC") plt.title("AUC"+str(metrics.auc(fpr,tpr))) plt.legend() plt.tight_layout() plt.show()
-
对数据集进行分类及预测
#1.获取朴素贝叶斯分类器的高斯模型,并训练 gaussian = bayes.GaussianNB() bernoulli = bayes.BernoulliNB() multionmial =bayes.MultinomialNB() gaussian.fit(train_x_iris,train_y_iris) bernoulli.fit(train_x_iris,train_y_iris) multionmial.fit(train_x_iris,train_y_iris) #输入数据出现负值,不能使用MultinomialNB #2.进行预测 test_y_gaussianHat_iris = gaussian.predict(test_x_iris) test_y_bernoulliHat_iris = bernoulli.predict(test_x_iris) test_y_multionmialHat_iris = multionmial.predict(test_x_iris) #3.输出准确率 score_gaussian = gaussian.score(test_x_iris,test_y_iris) score_bernoulli = bernoulli.score(test_x_iris,test_y_iris) score_multionmial = multionmial.score(test_x_iris,test_y_iris) print("gaussian score:"+str(score_gaussian)) print("bernoulli score:"+str(score_bernoulli)) print("multionmial score:"+str(score_multionmial)) # gaussian score:0.9777777777777777 # bernoulli score:0.28888888888888886 # multionmial score:0.9111111111111111
-
实现绘图,并实现决策边的绘制
# 绘图 drawing(train_x_iris,test_x_iris,train_y_iris,test_y_iris,'Gaussian') drawing(train_x_iris,test_x_iris,train_y_iris,test_y_iris,'Bernoulli') # drawing(train_x_iris,test_x_iris,train_y_iris,test_y_iris,'Multinomial') #报错:Negative values in data passed to MultinomialNB (input X),可能原因:数据降维后出现负值不适合于Multinomial模型
结果如下: