python贝叶斯分类器GaussianNB

运行环境:win10 64位 py 3.6 pycharm 2018.1.1
from sklearn import datasets,cross_validation,naive_bayes
import numpy as np
import matplotlib.pyplot as plt
#观察digit Dataset
def show_digits():
    digits = datasets.load_digits()
    fig = plt.figure()
    print('vector from  images 0:',digits.data[0])
    for i in range(25):
        ax = fig.add_subplot(5,5,i+1)
        ax.imshow(digits.images[i],cmap=plt.cm.gray_r,interpolation='nearest')
    plt.show()
show_digits()
#加载数据集
def load_data():
    digits = datasets.load_digits()
    return cross_validation.train_test_split(digits.data,digits.target,test_size=0.25,random_state=0)
#高斯贝叶斯分类器
def test_GaussianNB(*data):
    X_train, X_test, y_train, y_test = data
    cls = naive_bayes.GaussianNB()
    cls.fit(X_train,y_train)
    print('Traing score : %.2f' % cls.score(X_train,y_train))
    print('Testing score : %.2f' % cls.score(X_test,y_test))
X_train, X_test, y_train, y_test = load_data()
test_GaussianNB(X_train, X_test, y_train, y_test)
#多项式贝叶斯分类器
def test_MultinomialNB(*data):
    X_train, X_test, y_train, y_test = data
    cls = naive_bayes.MultinomialNB()
    cls.fit(X_train,y_train)
    print("Training score:%.2f"%cls.score(X_train,y_train))
    print("Testing score:%.2f"%cls.score(X_test,y_test))
X_train, X_test, y_train, y_test = load_data()
test_MultinomialNB(X_train, X_test, y_train, y_test)
#检测不同的a对多项式贝叶斯分类器预测性能的影响
def test_MultinomialNB_alpha(*data):
    X_train, X_test, y_train, y_test = data
    alphas = np.logspace(-2,5,num=200)
    train_scores = []
    test_scores = []
    for alpha in alphas:
        cls = naive_bayes.MultinomialNB(alpha=alpha)
        cls.fit(X_train,y_train)
        train_scores.append(cls.score(X_train,y_train))
        test_scores.append(cls.score(X_test,y_test))

    ##绘图
    fig = plt.figure()
    ax = fig.add_subplot(1,1,1)
    ax.plot(alphas,train_scores,label='Training Score')
    ax.plot(alphas,test_scores,label='Testing Score')
    ax.set_xlabel(r'$\alpha$')
    ax.set_ylabel('score')
    ax.set_ylim(0,1.0)
    ax.set_title('MultinomialNB')
    ax.set_xscale('log')
    plt.show()
X_train, X_test, y_train, y_test = load_data()
test_MultinomialNB_alpha(X_train, X_test, y_train, y_test)

这里写图片描述

#伯努力贝叶斯分类器
def test_BernoulliNB(*data):
    X_train, X_test, y_train, y_test = data
    cls = naive_bayes.BernoulliNB()
    cls.fit(X_train,y_train)
    print("Training score:%.2f" % cls.score(X_train, y_train))
    print("Testing score:%.2f"%cls.score(X_test,y_test))
X_train, X_test, y_train, y_test = load_data()
test_BernoulliNB(X_train, X_test, y_train, y_test)
#检验不同的a对伯努利贝叶斯分类器的预测性能的影响
def test_BernoulliNB_alpha(*data):
    X_train, X_test, y_train, y_test = data
    alphas = np.logspace(-2,5,num=200)
    train_scores = []
    test_scores = []
    for alpha in alphas:
        cls = naive_bayes.BernoulliNB(alpha=alpha)
        cls.fit(X_train,y_train)
        train_scores.append(cls.score(X_train,y_train))
        test_scores.append(cls.score(X_test,y_test))

    ##绘图
    fig = plt.figure()
    ax = fig.add_subplot(1,1,1)
    ax.plot(alphas,train_scores,label='Training Score')
    ax.plot(alphas,test_scores,label='Testing Score')
    ax.set_xlabel(r'$\alpha$')
    ax.set_ylabel('score')
    ax.set_ylim(0,1.0)
    ax.set_title('BernoulliNB')
    ax.set_xscale('log')
    ax.legend(loc='best')
    plt.show()
X_train, X_test, y_train, y_test = load_data()
test_BernoulliNB_alpha(X_train, X_test, y_train, y_test)

这里写图片描述

# 考虑binarize参数对伯努利贝叶斯分类器的影响
def test_BernoulliNB_binarize(*data):
    X_train, X_test, y_train, y_test = data
    min_x = min(np.min(X_train.ravel()),np.min(X_test.ravel())) - 0.1
    max_x = max(np.max(X_train.ravel()),np.max(X_test.ravel())) + 0.1
    binarizes = np.linspace(min_x,max_x,endpoint=True,num=100)
    train_scores = []
    test_scores = []
    for binarize in binarizes:
        cls = naive_bayes.BernoulliNB(binarize=binarize)
        cls.fit(X_train,y_train)
        train_scores.append(cls.score(X_train,y_train))
        test_scores.append(cls.score(X_test,y_test))
    #绘图
    fig = plt.figure()
    ax = fig.add_subplot(1,1,1)
    ax.plot(binarizes,train_scores,label='Training Score')
    ax.plot(binarizes,test_scores,label='Testing Score')
    ax.set_xlabel('binarize')
    ax.set_ylabel('score')
    ax.set_ylim(0,1.0)
    ax.set_xlim(min_x-1,max_x+1)
    ax.set_title('BernoulliNB')
    ax.legend(loc='best')
    plt.show()
X_train, X_test, y_train, y_test = load_data()
test_BernoulliNB_binarize(X_train, X_test, y_train, y_test)

这里写图片描述

阅读更多
版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/dingming001/article/details/80686600
想对作者说点什么? 我来说一句

没有更多推荐了,返回首页

关闭
关闭
关闭