贝叶斯分为:高斯贝叶斯,多项式贝叶斯,伯努利贝叶斯
高斯贝叶斯:先验为高斯分布的朴素贝叶斯
多项式贝叶斯:先验为多项式分布的朴素贝叶斯
伯努利贝叶斯:先验为伯努利的朴素贝叶斯,适用于样本特征是二元离散值或者很稀疏的多元离散值情况
import pandas as pd import numpy as np import matplotlib.pyplot as plt import matplotlib %matplotlib inline matplotlib.rcParams['font.sans-serif'] = ['SimHei'] #显示中文 plt.rcParams['axes.unicode_minus']=False #用来正常显示负号 #分析数据 data = pd.read_csv('010-data_multivar.csv',header=None) dataset_X,dataset_Y = data.iloc[:,:-1],data.iloc[:,-1] dataset_X = dataset_X.values dataset_Y = dataset_Y.values # print(dataset_Y) classes = list(set(dataset_Y))# 去重 # classes = np.unique(dataset_Y) print(classes)
[0, 1, 2, 3]
# 数据集可视化 def visual_2D_dataset(dataset_X,dataset_y): '''将二维数据集dataset_X和对应的类别dataset_y显示在散点图中''' assert dataset_X.shape[1]==2,'only support dataset with 2 features' plt.figure() classes=list(set(dataset_y)) markers=['.',',','o','v','^','<','>','1','2','3','4','8' ,'s','p','*','h','H','+','x','D','d','|'] colors=['b','c','g','k','m','w','r','y'] for class_id in classes: one_class=np.array([feature for (feature,label) in zip(dataset_X,dataset_y) if label==class_id]) plt.scatter(one_class[:,0],one_class[:,1],marker=np.random.choice(markers,1)[0], c=np.random.choice(colors,1)[0],label='class_'+str(class_id)) plt.legend() visual_2D_dataset(dataset_X,dataset_Y)
# 将分类器绘制到图中 def plot_classifier(classifier, X, y): x_min, x_max = min(X[:, 0]) - 1.0, max(X[:, 0]) + 1.0 # 计算图中坐标的范围 y_min, y_max = min(X[:, 1]) - 1.0, max(X[:, 1]) + 1.0 step_size = 0.01 # 设置step size x_values, y_values = np.meshgrid(np.arange(x_min, x_max, step_size), np.arange(y_min, y_max, step_size)) # 构建网格数据 mesh_output = classifier.predict(np.c_[x_values.ravel(), y_values.ravel()]) mesh_output = mesh_output.reshape(x_values.shape) plt.figure() plt.pcolormesh(x_values, y_values, mesh_output, cmap=plt.cm.gray) plt.scatter(X[:, 0], X[:, 1], c=y, s=80, edgecolors='black', linewidth=1, cmap=plt.cm.Paired) # specify the boundaries of the figure plt.xlim(x_values.min(), x_values.max()) plt.ylim(y_values.min(), y_values.max()) # specify the ticks on the X and Y axes plt.xticks((np.arange(int(min(X[:, 0])-1), int(max(X[:, 0])+1), 1.0))) plt.yticks((np.arange(int(min(X[:, 1])-1), int(max(X[:, 1])+1), 1.0)))
plt.show()
构造高斯朴素贝叶斯 from sklearn.naive_bayes import GaussianNB gaussian = GaussianNB() gaussian.fit(dataset_X,dataset_Y) #预测 pre_y = gaussian.predict(dataset_X) # print(pre_y) #判断预测结果和真实值的匹配数量 correct_count = (dataset_Y== pre_y).sum() # print(correct_count) #调用绘图函数 plot_classifier(gaussian,dataset_X,pre_y)
多项式朴素贝叶斯模型 from sklearn.naive_bayes import MultinomialNB #范围缩放 from sklearn.preprocessing import MinMaxScaler #要求所有特征必须是非负数,否则没法训练 #np.c_[x,y]列合并 np.r_[x,y]行合并 sclar = MinMaxScaler(feature_range=(0,1)) dataset = sclar.fit_transform(dataset_X) mul_nb = MultinomialNB() mul_nb.fit(dataset,dataset_Y) print(np.c_[dataset,dataset_Y]) # print(dataset_Y.reshape(-1,2)) print(np.r_[dataset,dataset_Y.reshape(-1,2)]) y = mul_nb.predict([[0.51539855,0.27345309]])#预测 print(y) y_pre = mul_nb.predict(dataset) plot_classifier(mul_nb,dataset,dataset_Y) from sklearn.naive_bayes import MultinomialNB #范围缩放 from sklearn.preprocessing import MinMaxScaler #要求所有特征必须是非负数,否则没法训练 #np.c_[x,y]列合并 np.r_[x,y]行合并 sclar = MinMaxScaler(feature_range=(0,1)) dataset = sclar.fit_transform(dataset_X) mul_nb = MultinomialNB() mul_nb.fit(dataset,dataset_Y) print(np.c_[dataset,dataset_Y]) # print(dataset_Y.reshape(-1,2)) print(np.r_[dataset,dataset_Y.reshape(-1,2)]) y = mul_nb.predict([[0.51539855,0.27345309]])#预测 print(y) y_pre = mul_nb.predict(dataset) plot_classifier(mul_nb,dataset,dataset_Y)
伯努利贝叶斯 from sklearn.naive_bayes import BernoulliNB bernou = BernoulliNB() bernou.fit(dataset_X,dataset_Y) y_re = bernou.predict(dataset_X) plot_classifier(bernou,dataset_X,dataset_Y)