#-*- coding:utf-8 -*-
# 二项分布 多项分布
# 二项分布公式 P(A|B) = P(B|A) * P(A)/P(B)
from sklearn.naive_bayes import GaussianNB
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
iris = load_iris()
X = iris['data']
y = iris['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# 朴素贝叶斯 高斯分布
gNB = GaussianNB()
gNB.fit(X_train, y_train)
res = gNB.predict(X_test)
print(gNB.score(X_test, y_test))
# 多项式分布贝叶斯
from sklearn.naive_bayes import MultinomialNB
mNB = MultinomialNB()
mNB.fit(X_train, y_train)
res = mNB.predict(X_test)
print(mNB.score(X_test, y_test))
print(mNB.score(X_train, y_train))
# 伯努利分布贝叶斯
from sklearn.naive_bayes import BernoulliNB
bNB = BernoulliNB()
bNB.fit(X_train, y_train)
res = bNB.predict(X_test)
print(bNB.score(X_test, y_test))
print(bNB.score(X_train, y_train))
# 文本分类
from sklearn.feature_extraction.text import TfidfVectorizer
tf = TfidfVectorizer()
tf.fit(X['message'])
X_train = tf.transform(X['message'])
naive_bayes
最新推荐文章于 2021-03-20 11:30:18 发布