1. 数据集

2. 实现
2.1 代码
from sklearn.datasets import fetch_20newsgroups
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
def bayesian_demo():
'''
朴素贝叶斯-文本分类
:return:
'''
news = fetch_20newsgroups(subset='all')
x_train, x_test, y_train, y_test = train_test_split(news.data, news.target, test_size=0.2)
transfer = TfidfVectorizer()
x_train = transfer.fit_transform(x_train)
x_test = transfer.transform(x_test)
estimator = MultinomialNB()
estimator.fit(x_train, y_train)
y_predict = estimator.predict(x_test)
print('y_predic:\n', y_predict)
print('直接对比真实值与预测值:\n', y_test == y_predict)
score = estimator.score(x_test,y_test)
print('准确率为:\n', score)
if __name__ == '__main__':
bayesian_demo()
2.2 结果
