模型原型
class sklearn.naive_bayes.MultinomialNB(alpha=1.0,fit_prior=True,class_prior=None)
参数
- alpha
- fit_prior
True:均匀分布 T r u e : 均 匀 分 布
False:P(y=ck) F a l s e : P ( y = c k ) - class_prior:指定每个分类的先验概率
属性
- class_log_prior_:形状为(n_classes,)的数组对象,给出了每个类别调整后的经验概率分布的对数值
- feature_log_prob_:形状为(n_classes,n_features)的数组对象,给出了
P(Xj/y=ck) P ( X j / y = c k )的经验概率分布的对数值
- class_count_:每个类别包含的训练样本数量
- featue_count_:每个类别每个特征遇到的样本数
方法
- fit(X,y[,sample_weight])
- partial_fix(X,y[,classes,sample_weight])
- presict(X)
- presict_log_proba(X)
- predict_proba(X)
- score(X,y[,sample_weight])
from sklearn import datasets,cross_validation,naive_bayes
import numpy as np
import matplotlib.pyplot as plt
显示Digit Dataset数据集
def show_digits():
digits=datasets.load_digits()
fig=plt.figure()
print("vector from images 0:",digits.data[0])
for i in range(25):
ax=fig.add_subplot(5,5,i+1)
ax.imshow(digits.images[i],cmap=plt.cm.gray_r,interpolation='nearest')
plt.show()
show_digits()
加载数据
def load_data():
digits=datasets.load_digits()
return cross_validation.train_test_split(digits.data,digits.target,test_size=0.25,random_state=0)
测试多项式贝叶斯分类器
def test_MultinomialNB(*data):
X_train,X_test,y_train,y_test=data
cls=naive_bayes.MultinomialNB()
cls.fit(X_train,y_train)
print('Training Score:%.2f'%cls.score(X_train,y_train))
print('Testing Score:%.2f'%cls.score(X_test,y_test))
X_train,X_test,y_train,y_test=load_data()
test_MultinomialNB(X_train,X_test,y_train,y_test)
检验不同的a对多项式贝叶斯分类器的预测性能的影响
def test_MultinomialNB_alpha(*data):
X_train,X_test,y_train,y_test=data
alphas=np.logspace(-2,5,num=200)
train_scores=[]
test_scores=[]
for alpha in alphas:
cls=naive_bayes.MultinomialNB(alpha=alpha)
cls.fit(X_train,y_train)
train_scores.append(cls.score(X_train,y_train))
test_scores.append(cls.score(X_test,y_test))
#绘图
fig=plt.figure()
ax=fig.add_subplot(1,1,1)
ax.plot(alphas,train_scores,label="Training Score")
ax.plot(alphas,test_scores,label="Testing Score")
ax.set_xlabel(r"$alpha$")
ax.set_ylabel("score")
ax.set_ylim(0,1.0)
ax.set_title("MultinomialNB")
ax.set_xscale("log")
plt.show()
X_train,X_test,y_train,y_test=load_data()
test_MultinomialNB_alpha(X_train,X_test,y_train,y_test)