# 朴素贝叶斯算法
# coding=utf-8
from sklearn.datasets import fetch_20newsgroups
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
print("*"*100)
def naviebayes():
"""
朴素贝叶斯
:return:
"""
news = fetch_20newsgroups(subset='all')
# print(news)
# 数据分割
x_train, x_test, y_train, y_test = train_test_split(news.data, news.target,test_size=0.25)
# 对数据进行特征值抽取
tf = TfidfVectorizer()
x_train = tf.fit_transform(x_train)
print(tf.get_feature_names())
x_test = tf.transform(x_test)
# 朴素贝叶斯算法
mlt = MultinomialNB(alpha=1.0)
print(x_train.toarray())
mlt.fit(x_train, y_train)
y_predict = mlt.predict(x_test)
print(y_predict)
sc = mlt.score(x_test, y_test)
print(sc)
return None
if __name__ == "__main__":
naviebayes()