from sklearn.datasets import load_iris,fetch_20newsgroups,load_boston
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.feature_extraction.text import TfidfVectorizer
import pandas as pd
from sklearn.naive_bayes import MultinomialNB #朴素贝叶斯算法
def process_news():
# 获取新闻数据
news=fetch_20newsgroups(subset="all")
# print(news)
# 将数据分割为训练集测试集
x_train,x_text,y_train,y_text=train_test_split(news.data,news.target,test_size=0.25)
# print("训练集特征值目标值")
# print(x_train,y_train)
# print("测试集特征值目标值")
#对数据进行特征抽取
tf=TfidfVectorizer()
x_train=tf.fit_transform(x_train)
print(tf.get_feature_names())
tf.x_text=tf.transform(x_text)
#进行朴素贝叶斯算法预测
mlt=MultinomialNB(alpha=1.0)
print(x_train)
mlt.fit(x_train,y_train)
y_predict=mlt.predict(x_text)
print("预测文章的类型为",y_predict)
print("准确率:",mlt.score(x_text,y_text))
def main():
process_news()
if __name__ == '__main__':
main()
运行结果: