#探索文本数据
from sklearn.datasets import fetch_20newsgroups
data=fetch_20newsgroups()#类字典的方式
#不同类型的新闻,标签的分类
data.target_names
import numpy as np
import pandas as pd
categories=["sci.space"
,"rec.sport.hockey"
,"talk.politics.guns"
,"talk.politics.mideast"]
train=fetch_20newsgroups(subset="train",categories=categories)
test=fetch_20newsgroups(subset="test",categories=categories)