# sklearn实现多项式朴素贝叶斯

# --*-- coding:utf-8 --*--
from sklearn.datasets import fetch_20newsgroups
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report

news = fetch_20newsgroups(subset='all')
# 输出文本数量
print('------------------------------')
print('------------------------------')
print('获取邮件数量为:', end='\t')
print(len(news.data))
X_train, X_test, y_train, y_test = train_test_split(news.data, news.target, test_size=0.25, random_state=33)
# 调用文本转向量函数CountVectorizer()
vec = CountVectorizer()
# 用fit_trainsform先对文本拟合再将其转换为向量
x_train = vec.fit_transform(X_train)
'''由于上一步已经对训练集进行拟合过了,这里不能再次对测试集进行拟合,否则会出现训练向量
和测试向量尺寸不匹配问题,这是由文本转向量过程造成的.文本转向量原理稍后编程说明.
'''
x_test = vec.transform(X_test)
# 调用多项式朴素贝叶斯模型
mnb = MultinomialNB()
# 用模型训练数据
mnb.fit(x_train, y_train)
# 测试模型
y_predict = mnb.predict(x_test)
# 输出预测
print('------------------------------')
print('------------------------------')
print('测试集预测结果为:')
i = 0
predict_content = []
while i < 4711:
predict_content.append(y_predict[i:i + 10])
i += 10
for i in predict_content:
print(i)
# 对比预测结果和测试集结果,输出预测错误的文件索引
print('-----------------------------')
print('-----------------------------')
for i in range(len(y_predict)):
if y_predict[i] != y_test[i]:
print('------------------------------')
print('预测结果为:', y_predict[i])
print('实际结果为:', y_test[i])
print('预测错误的测试集索引为:', i)
# 模型评估
print('------------------------------')
print('------------------------------')
print('模型置信度为:')
print(mnb.score(x_test, y_test))
# 打印报告
print('------------------------------')
print('------------------------------')
print('报告如下:')
print(classification_report(y_test, y_predict, target_names=news.target_names))



D:\anaconda\python.exe C:/Users/pc/Desktop/pypro/myfirst_pro.py
------------------------------
------------------------------

------------------------------
------------------------------

[ 8 15 13 12 18  1  4  9  4  7]
[ 3 12 12 18  1 10 12  7 12  8]
[16 12  6 12  3 15  5  9  0 16]
[ 1  8 13 15 13 16  6 14  3 11]
[ 3 15 15  6  8  1 13  8  0  3]
[ 5 15 16  1  6 15  0  4  6  7]
[ 3  6  4 18  3  1  5 16 16  6]
[ 3 14 13 11  1  3 18  9  8 10]
[ 3 11  6 15  5 19 10 10  6 17]
[ 8 14 18 13  0  3  6  1  8 18]
[12 10  6  8  3  7 10 11  5  9]
[ 8 15 17 17 13 18 16 17  8  1]
[ 5  4 12 13 15 17 15  3  7  3]
[ 1 19  8  9 17  8  7 17  8 17]
[17  5  0 11  9  0  1  8 15  7]
[15  7  4  4 18  5  1 15  4  1]
[13 16 17 15  7 17 14  5  5 17]
[ 4 12 13 15 10 17 14  1 12  1]
[ 9 12  3 10  5  1  3  0 10 16]
[17  3  5 10 18  2 10 10 18 11]
[18 13  8 12 13  0 17 19 10  9]
[10  5  7 17 16  9 10  1  7  4]
[17 16  9  9  5  0 11  1 19 13]
[ 6  1  9 16 16  9 10  8 13  4]
[10 18 11  7  7  4  3  0  0  1]
[14  1 14 16 11  8  1  7 15 10]
[14 12  3 12  3  3 12  6  3 14]
[ 8 18 17  4 14 10  5 12  1 12]
[15 15 11  5  7 14  3  9 16  1]
[15 18  0  4 17 16  0  1 10 15]
[ 3  8  0  7  3 14  4 16 14 11]
[ 8  4 16 13 14 16  1  3  1  4]
[10 16 15  7 11  0 14  7  3 13]
[ 3  8 15  7  3 16  0  4  8  7]
[10 12 11  5  3  9 18 15  7  5]
[11 14  8 16  7 14  3  5 17  8]
[14  6  9  0  3 17  1  0 12 16]
[ 8 13 10  9  5  6  4 18  7  8]
[11  3 13 17  6  6  1  0  9 13]
[10 12 14  9 18 17  5  0  0 16]
[10  3  3 16 14 14 18  3  4 13]
[ 5  7  1 12 13  4 11 11  3 18]
[ 3  3 14 14  0 17 14  1 18  8]
[13  7  7 13 17 18  1 12 18 15]
[ 6  4  1  9  3 11 13  8 17 17]
[12 13 12 16  6  5  3 15  3 11]
[13 18 11  7 13 17  3 10 12 18]
[ 7  9  7 15  4  5  1 10 16 16]
[ 6 13  4 10 14 14 17  5  4 13]
[12 18 10  9  3  3  1  7 17  4]
[16 10 14  1  3 13 16 10  5  5]
[12 18 18 11 10  3  7 12 13 12]
[ 8 12 15 10  6 16  3 16 13 11]
[10  5 11  4  3 18 18 16 13  5]
[11 15 16  5  6 18 12  4 12 17]
[ 4  1 16 13  3 10  5  7  7 11]
[ 5  0  5  3 17  4  9  0 11  9]
[18 11  5  1  4  2 11  9 16  3]
[15  1 12  5  3  0  7 16  5  3]
[ 9 13 13  8 18  4 10  9 10  3]
[12  3 13 11 11  1 16  7  1  4]
[13  1 15 11 10 13 15 16  0 14]
[ 5 13 18 15 10  8  8 18  4 10]
[11 15  6 16 13 11  9  0  1 14]
[ 3  5 19 16 11  4  2 15 13 17]
[12  9 17 13  5  0 11 12  5  4]
[ 7 15  3  9  6  4 17  1  3  3]
[ 4  3 16 13 18 11 17 13 11 10]
[19  3 18 15  5 10 12  4 13  1]
[ 7  4  3 10 14 16 16 10 19  1]
[ 9  1  3  1 11  9 10 11  4  5]
[ 0  4 13 16  6 10 15 14 15 16]
[ 9  5 16 13  7 15 18  5 13  9]
[ 7  1 17 12 11  8 14 11 18  9]
[14 16 10  8  6  3  9  3  1 11]
[14  1 16 15  1  6  1 15 14  5]
[ 3 11  5 15  8 12  3 15  4 11]
[ 9  7 17 10  4 17 10  5  5 18]
[10  7 18 15  9  1  8 12 16  6]
[10 13  2 11  4  9  3  5  0  0]
[ 8 16 13  7  3 15 14 18 12  4]
[ 6  8 16 16 13 17  1 17 10  5]
[ 5  7  1 16  7 17  5  1 15  7]
[11  9 13  5 16 11 12 14  1 15]
[ 1  6  7 13 12  4 14  1 12  3]
[ 7  5 17 15  6 11  8  9  5 15]
[15  5  7 12 10 16  0  5  3 11]
[17 18  7  1  8  9  8  3  7 12]
[ 0  5 12 13  5 18 17 14 18  8]
[15 18 12  1 16 16  5  3  3  6]
[ 6  1 16 11  1 18  5 11  3 10]
[ 7 18  1  1  4  1  4 15 10  0]
[ 9  4  5  6  7  1  8 17 14 15]
[14  0 18  0  6  9 10  9  3 13]
[11  6  5 12  8  0  5 12  4 18]
[10  8  8 13 15  3  5 16 15  3]
[10  8  1  3 17 13  1  8  8  5]
[15 14  0 10 18 14 13 17  3  1]
[19 15  8 16 16  6  6 15  0  9]
[ 4  7 15 13 17 12  8  6  1  0]
[ 5  6  3  7 12 16 16 17 15 16]
[17  4  8 17 11  4 13 19  1  0]
[16  4 14  3  7 18  4  0 13 13]
[ 0  8  5  3  7  3  9 12 16  0]
[18  1  4 11  5  6  8 15 12 15]
[ 5  5  1 10  8  0 13  9  6  3]
[ 7 13  5  1  4 16  1  5  8  1]
[11  8 11  6 16  5 17  8  5 16]
[ 3 15 15  0  8 12 15 19 18 18]
[16  5  6 14 15  0  7  1 12  3]
[ 1  5  1 17  0  1  1 14  8  3]
[ 8  5  7 12 16  1  5  3 14  3]
[18 13 18  1  7  1  1 16  9  3]
[ 9  5 18 18 12  0  9  3 14 16]
[ 5  4 18 18 14 16 14 17 17  8]
[12 11 15 16 17  8 12 16  5 10]
[17  9 12 17 17 18  4  6 12  1]
[ 8  8 15  8 13 18  0  7  3  9]
[ 7 17 13 11  9 16 11  9  3 17]
[18  0 17 14 13  1 15 16  3  6]
[ 3  1  8  3  9  0  1 15 12 11]
[14 18 10  8 11 17 11  2  4  5]
[ 2  5 17 16 10 17  3 15  4  4]
[10 18 11 10 16  6 12  3  7  9]
[10 13  8 15 15 14 17  6 12  6]
[15  5 15 15  9  3  8 16 12 18]
[10 10 10  8  0  3 19 19 16  4]
[10  1  5  3 14  3 18 19  4 18]
[ 9  3 12 15  0 17  7  4  9 17]
[ 5 10  6  3 15 10 12 13  3 12]
[13  3 17 14 11 15  3  6 10 13]
[ 1  9 12  9 18 10  8 15  4  5]
[16  8  7  6 15  8  6  1 13 14]
[14  6  9 17  8  5  4 11  4 15]
[ 5 19 13 13  1  5 15 11  7 14]
[13  9  8 14  6 11 17  3  7  7]
[15  9  0 17 11  6  1  0  1  5]
[17  3  7  1  1 12  7  3 12  8]
[ 4 13 11  9  9 14 14 10 10 11]
[12  3  3 11  6 17 17 11 15 16]
[ 1 17  4 17 16  5  8  8 13 14]
[12  3  8  0 12 10 15  3 10  1]
[11 15  4  1 10  9  3  5 14 16]
[ 0 11 17 15  5 15  9  6  0 15]
[ 0  6  1  1 13 14 18 19 14 13]
[18  6 17 14 15 19  5 10  7 14]
[ 8  3 14 10  7 10 16 15 17 13]
[ 5  6 12 10 11 15 17 15  3  8]
[17  6  8  0  7  1 17 14  8  9]
[10  3 10 13 18  3 16 13  7  9]
[ 0  8 13 14  5 16  7 10 18  1]
[15 12  3  5  1 16  3  4 16 17]
[ 6  8  9 10  9 10 17 12  9  3]
[ 3 10  3  1 16 17 13  0  1  8]
[10  9  3 12  7  1  5  1 11 16]
[12 14  3  1 11  0  7 12  0  9]
[15 15  4  7 12 14  2  9 14 11]
[12 10  5 16  8 18  4  9 18  9]
[14  3 11 18 15  1  6 19  1 19]
[12  6  8 13 14 10  1  7 13 15]
[14 14 19  6  6 19  5  1  8 10]
[ 3  0  1  7  3  1 10  5 17 16]
[ 3  7  3 15  9 10  6  3 16  1]
[18  1  0 16 10 17  4 10 12 15]
[11 14  6  7 11  5 18 15 17 10]
[ 9 11  5  8  9  3  1 11  5 14]
[16 18 18  4  9 11 12  7  7  9]
[ 1  5 18  8 14  0 17  1  1  8]
[16  8 15 14  6  7 10 13 14  3]
[18 10  4 17  5  8 16 16 17  4]
[ 8  1  0 16 10 18 11 17 16  6]
[ 1  9  3 15  8 15 16  5  1  3]
[ 0 16  3 10  6  6 11  9  5 12]
[ 7  3 14  7  1  3  8  0  3 14]
[11  5 11 13 15  9  7  7 18 16]
[ 5  3 15 18  3  4 18  1 11 17]
[11 12  7 16 11 13 14  9  8 12]
[ 9  8  1  8 10  4 15 11  6  1]
[ 8  1  9  5 10  9  1 11 18  6]
[ 7 10  4 10  8 17 14  8 15 11]
[17 17  8 10 14 11 12  7  1 18]
[ 3  8 14  1  9  1  0  9  0  9]
[ 1 14 12 18 17  0 18 12  9  6]
[ 1  3 14  7  3  6 12  0 15 19]
[ 8 17  2  3  3  5  1  3  1 14]
[10  0  9  4 15  6  9 18 12  1]
[ 8  8 14  0  5 11 16  6 11 17]
[13 17  4 12  1  6  3  5 15  8]
[14  0  3  4  7 10  3 16 11 18]
[16 12 12  7 11 12  4 19  3 11]
[15  3 10 10 18 18  0 14 16  1]
[ 3  5  1 16  9 16 10  1 11  7]
[ 7 11  1  3  7  0  8  7  8 17]
[18 13 17 10 12 12  4 15  3  6]
[15 17  6 18  3  6  9 13  7  9]
[11 18  0 18 17  0 15  8  9 14]
[15 11 18  7 18 17 18 16  7 14]
[12  3 10 14 11 11 16  6 12 11]
[15  0  5 14 16  8 17  1  7 18]
[17 13 16 17  8 14 16  8  5 11]
[ 6  5  9 17 11 12 13  8 16  0]
[12  1 15  4  3 13  9 12  6  2]
[12 12  3 14  4 16  4 12 12  1]
[ 1 16  9  8  3 16 13  0 10  5]
[ 5  0 16 13  4 14  5 16  4 15]
[15 16  8 16 14  1 17 14  7  1]
[ 1  7 10 15  5  7  5  1  1 12]
[ 6  6  8 16 11  1  0 16 13 13]
[14  1 19  7  7 11 12  1  6 15]
[18  5  9 17 12 17 18 18  9  1]
[16  9 11 17 19 18  4  1  0 15]
[ 1  3 11 10 13 11  3  2 14  1]
[16  8  3  8 15  1 14 19  7  8]
[14  3 17 18 17 13  9  9 18  7]
[16 13  0 14  4  3 12  9 10 14]
[ 8  8 12  7  1 11  9 19 14 12]
[ 1  1 19 15  3  3  4 11 12 17]
[ 8  8  3 15  4 10 17  5  3  4]
[15 16 15 15  6  5 13  9  3  5]
[10  5 11 12  9  0  6  5 19  6]
[17  3 10 14  1 13 11  8  6 18]
[19 15  4  3 10  9  8 11 15  4]
[13  4  7 14 12 17  1  4 14 18]
[ 8 16 15  1  3 12  4 16 12 14]
[ 5  9  0 17  5  8 12 11  4  6]
[15  7 16 10  5  8  6  1 14  7]
[ 8 16  1  0  8 15  5 11 18  8]
[ 8  5  7 12  3  6 12  4  7  0]
[11 13  4  6 12 10 11  1  3 15]
[ 5  6  5 11  7  1  1 14  8 18]
[ 6  6  7 16 17 13  8 15 10  7]
[ 8  3  4  2  0  5  5 12 11  0]
[ 9 14  7  3  3 14 11 16 10 17]
[15 13 12 11  7  3 11 14  8 17]
[10 10 15  2 18 13 16  3 18  4]
[11  9  1  8  4 16 17 17 12 15]
[15  6 15  9  1 16 15 12 11  0]
[14 15  8  4 10  4 19  1 14 11]
[16  9 11  8 13  0 17  3 19  1]
[ 4  5  1  3 14 10 14  5  2  0]
[ 1  1 10 11 17 19 10  6 17 15]
[11  1 15  6  1 10  5 15  3 11]
[18 15  0 14 14  1  1 15 15 12]
[13  4  9  9  3  8 13 10  1  4]
[15  9  0  1  4 13  1  4  7  6]
[ 1  1  3  8  6 17  6 17  7 15]
[10  9  1  8  3 14 19 16  5  9]
[11 17 18 15 16  0 17  6 12  0]
[ 6 17  3 14 11 11 16  1 17  6]
[ 1  1 15 10 14  6  3 13 11 15]
[13  0  7 12 17  8 14  9  9 16]
[17  8 12 18 10 16 15 18  8  4]
[14 18  1  5 19  0  3  1 15 13]
[15  8 12 17  6  3  0  4 16  7]
[19 15 15 10 17  9 11 10 10 17]
[16  9 11  1  6 10  3  4  3 17]
[ 4 13  8  1 12 16 15  1 15 11]
[ 6  0 11  6 14  1 13 12  9 10]
[ 7  8 11 11  1  7  4  3  1  3]
[ 2  5  1 18 15 18  8 18 18 11]
[ 4 16 11 13 13  9  3 13 18 11]
[14 10 16  3  3 13  8 17  3  7]
[ 0 15 10 14  1 10  4  5 15 16]
[ 0 14 19 17 12 10  8 11  8 15]
[ 0  3 15 12  8  1  1 15  8 17]
[14  7 15 18  5  0  4  4 13  3]
[ 5  1  8 15  4  1 12 18 10 12]
[ 7 12  3  6  5  1  7  4 17 13]
[ 1  7 12  1  5  3  7  3 11  4]
[14 15 18 15  6  7  0 15 12 12]
[ 7  8 14  6  5 12  1 14  3  6]
[ 1 18 18 14  5 13 16 16  7 16]
[ 9  1 12  1 10 18 18  7  3 14]
[16  1  3 18 16  3 12 10 11 11]
[ 3 14 12  1 10  5  4  9  1 12]
[ 6 10 16  7 14 16 10 15 17  3]
[16  6 16 17  0  9 15  7 11 13]
[13  3 18  5 15 17  6 13  3 16]
[ 0 14 10  1  6 17  1  0  5  5]
[13 12  7  0  5 19 12 15 18  9]
[14  0 16  5  8  5  9 19 16  1]
[14 13  3  9  9  3 12  9  1 15]
[ 8  1 17 14  4 12  8 13  7 16]
[ 7  8  5  6 14 12  0 15 14  3]
[ 3  1 10  5  5 10  4  9 16  4]
[14 18  1  5 17  0 19 18 18  3]
[ 9 19 11 16  9 14 11  3 10 17]
[10 14 10 14  5  6 10 18  0  8]
[ 9 12 16 11  9  3  1  7 13  4]
[18 16 13  5 17  4 17  8  4  8]
[11  9 12  5 11  5 11  3 16 13]
[ 6  4 15 14  1 16 11  9 17  1]
[10 19  0  4 16  6  1 14  1  3]
[13 11  3  6  5  4 15 10  3 15]
[ 7  7  4  1 12 17 13  1  5 18]
[14  9  1 17 16 16  1  3  0 10]
[ 1 15  1  8 18 17  0  3 14  7]
[19  0 16  4 16  4 16 14 17 13]
[16 15 10  8 18 13 17 11  8  6]
[ 0 14 11 17  1 14  9 14 13  0]
[ 3 12  3 14 18 14 14 13  1  7]
[ 6 13 11 15  3 17  8 16 11 17]
[ 0  8 10  5 14 12 15 11 17 13]
[14 18  1 17  9 12  9  7 14  1]
[ 5 13 16 15 16 13  5  1 15  5]
[ 1  5 14  0 16 13  4 11  3 17]
[13 17 17 16 10  0  7 12 10  0]
[18 14 13 15  9  3 14  9  3  3]
[ 8 12 15  5 17  3 11 11 15 10]
[14 17 10 18 10  1 11 16  4  0]
[ 3 13 16 11 10 16  1 19  9  3]
[ 3  5 15  5 10  5  5 18  3  6]
[ 3 13  8 11  7  3 11 15  0 14]
[ 7 16  9  1  8 10  1  5  1  0]
[ 4 17 14  3 15  8  3  9 15 17]
[18 18 10 10 12 11  5 15  5 14]
[15  1 15  6  3 11  9 15  4 14]
[ 1 12 13 12  8 16 10  5 13 18]
[ 2 17  9 13  3  6 15 12  2 15]
[16  3  9 13  6  8 18  5 13 12]
[ 1  7 10 11  3 15 11 18  1  7]
[ 1  9  6 15 17 15  9  8 15  3]
[ 0 14  9  3 18  9 14 14 14 10]
[ 3  1 18  5 11 18 14  6  5 15]
[16 11 18  1 13 13  5 10 12 10]
[12  1 12  3  4 10  6  3  5 13]
[18 15 10  5 14 13  6  0 17  8]
[ 9 18  0  5 18 18  7 18  8  4]
[ 7 16  9 17 16  3  4 13  3  0]
[ 5  9  5  7 11 13  9 19  3  8]
[10  8 16  7  1  4  1  6 15  5]
[ 8  6  0 11  6  9  1  0  1  1]
[ 7  3 13  0 16 17 15  6  5  8]
[10  9 14  8  9  9 15 10  7  8]
[11 10 13  0  8  3 10 12 13  3]
[13 17 18  9  3  6  9  9  6  3]
[ 7  1  5  9  5  7 16 18 13 10]
[ 0  1 14 16 13 11 17  8 15 14]
[ 2 12 14  5  9  1  7 13  1  5]
[12  7 18 12 11  0  1  9  8 11]
[ 0 18  1  5  6 17 16 16  1  3]
[15  9  8 10 14  4 13 17  1  5]
[19  8  8  4  2 10 17 14  4 10]
[ 8 13  3 17 10 18  9 13 11 11]
[18  7  7 17 12 11  1 12 18 15]
[ 4 14 13 11 14  4 17 13 17  5]
[12 17 16  5 14  7  7 13 14 13]
[17 13  6  0 15 16 13 12  7  0]
[11 12  5 10  3 13  5 18  3  7]
[ 9  5 17  6  7 11  4  1 14  1]
[ 6 10  4  7  1  5 10  3 10 13]
[ 7 11 10 11 16  5  2 14 15  8]
[12  5 14 17 13  1  1 10  3  1]
[ 9  1 18 14  6  5 12  3  7 13]
[17  8 10 18  8 10 16  9  5  3]
[15  7  1  8 16 12 15  5  5 13]
[14  8  7  5  9  7 18 12  3 10]
[ 5  5  5  3  1  2  7 12  8  4]
[14 11 12 15 12  3  1 11 15  5]
[ 1 18 15  4 13  4  1 13 12  8]
[10  9 16 17  1  6 14  9 16 17]
[ 6  8  4  0 17 18  3 11 16  8]
[ 5  6  1 19  0 10  7 12  4 11]
[10 11  9 10  9  9  6  3  8 15]
[ 1  9  0 16  5  7 11  5  1 15]
[ 6 16 16  4  0  1  1 18 11  9]
[10 15 17  4 16 19  8 11  8  7]
[15 11 12 17 17 13 13  1 12 15]
[ 3 17 16  4 17  1  5 13 11 13]
[13  1 17 12 12  3 12 16 10 17]
[11  1 18  5 11  4  7  0  8 13]
[19  5  8  5  6  1 15  1 17 12]
[ 9  3  0  5  5  4  5  3 16 18]
[18  5 11 17 12  0  1  1 13 13]
[ 9 12 13  7 11  3  7  3  3  8]
[ 5 13 15 12 13 18  5  1  1 11]
[17  1  1 10 11  8  7  7  8  8]
[18  8 13  7  3  4  4 15  7  5]
[ 3 18  9  8  7 17 10  7  6 13]
[13 18 11 13  9 14  3  3 18  6]
[ 8 11  6  8 15  1 10  7 15 16]
[11  3  3 13  1  0  4 15 18 14]
[ 2  0  5  7  7 15 12  4 12  5]
[11 16 11  0  1 19 14 16  7  3]
[16 17 17 15  5 12  1  0 15  0]
[11 15  5  9 13  7 18  5  3 17]
[ 7 16  5 14 12 17 10 14 19 15]
[14 18 19 14 16  0 15 17 16  3]
[ 1 11 13  4  0 17  8 17  3  3]
[ 9 14  0 13 12  0 13  5  7  5]
[ 3 13  3  1 16 12 15 16 11 14]
[11  7 15  0  5  3 18 17  4  1]
[ 3 13 14  7  5 17 12 17 11 13]
[ 3 16  4  4  3 17  5 15 18 12]
[17 11  4  3 11 15 10 17 14  9]
[11  1 17  0 11 16  5  3 11 11]
[ 1  8 12 15 13  3 16 15  7 11]
[ 9 15  7  7 15  1 13 13  6 11]
[ 0 12 11 13  6  1  4  8  9  3]
[14  9 10  3 10  4  5  2  7 17]
[ 9  0  1 16  1 16 12  3  4  3]
[12  9 16 14 15 12  0 10 14  4]
[ 7  8 12  1  7  1 18  4 15 18]
[14 10  7 11  5  1  0 11 18  3]
[16 12 12  9  4 19 12 15  7 18]
[16 15  7  6 13 19  1 11 13 10]
[13 18 19  9  1 12  6 15  7 12]
[14  3 10  1 16  5 16 10  1  6]
[13  5 11 18 10 14 18 12 15  9]
[10 17 14  2  1  3  0 10 11  0]
[ 1 15  7 18 15 12  8  0 13 16]
[19  9 17  9 19  6  7 11  3  1]
[12 17 18  3  7  7  0 15 12 14]
[ 5 15 15 11  3 15  6  0 16  8]
[16 11  8 19 15  8  5 12  9  4]
[18  1  2 10  9 10 10 11 16 14]
[10  6 11  1 14 19 10  7  6  0]
[ 1  9 15 18  4 14 12 14 14  6]
[ 0 12 19  0 11  8  7  1 11 12]
[17  3 16 13 14 13  1 16  8 11]
[11 12  7  1 11  0 15 16  1  0]
[ 6  4  9 16 13 13 18  5 12 15]
[18  6 15 11  5 16 11 10  1 17]
[ 6  4  9  3  9  6 15 16 14 11]
[13 16  4 11  5  9 14 10 16 18]
[11  8  9 17  1  6  7  7  8  3]
[ 7  1  0  7 13  5 18 11 16  9]
[ 1  8 16 14  0  7  8 15  8 16]
[17  5 12 11  1 15 17  1 16 17]
[13  8  4  5 14  5 11  0 15 11]
[ 0 12 13 13  8  5 17  6  5 10]
[19 12 15  7  3  9  9  8 12 13]
[16 14  9 13  9  6 16 10 13  1]
[10  0  3  4  3 12 18  8  8 11]
[16 10  6  1  0 13  8  5 15  1]
[ 8  6 12 16  0  3 16  3 18  8]
[12  1  3  4 16 16  3 17 12  3]
[ 9  4 17 16  0  3  4 16  6 14]
[ 7 15 16 14 19  4 12  0 12  8]
[ 7  3 17 18 17 16  0  1 17  1]
[ 0 15 10  0  1  4 10  4 16  4]
[ 8 13  0 15  3 14 15 19  0  3]
[ 9  0  6 12 17 19 17  3  4 13]
[11  3 10  0  8 10  0 14  7  6]
[ 7 11  3  4  6 10 17 19  1 14]
[12 12  9 17  8 12 12 17  5 17]
[15  7 14  8  1 12  4  3  3  5]
[11 15 10  1  9 19 12  4  6  6]
[ 9  8 15  6  3 11  0  6  9  9]
[13  0  2 12 10 10  0 18  7 11]
[ 5  8 14 10 13  1 13 16  1 11]
[ 7  5 11  3  7  7  7 15 12  1]
[ 8 11  0  0  3 13 11 16 13 12]
[ 7 14 12 14  3 13  9 18  6  5]
[ 1 10  3  4 17  6 11 11 12 17]
[ 2 14 18  8  7 12 19 10  1 17]
[19  8  5 11  4  0 14  1 11 19]
[12 15 11  1  7  3  3  1  8 19]
[ 1  7  4 10 13 11  5 10 11  3]
[ 1  6 19  6  8 17  8 16 10  3]
[ 3  3 18  2 13  3  7 15  9  3]
[ 6 16  6 11 15 19 19 16 16 15]
[ 6 10  4 17  5  1 12 16  1  6]
[ 8 15 14  1  4  5  4 13 11 13]
[13  1  5  9 13  3  3 13  8  6]
[13  3  3 18 12  0 12  3  6 17]
[12 12  5  6 17 18 13 19  9 15]
[ 6 12  9 13 18  7 16 12  9  1]
[10  3  7  1  8  7 12 17  1 16]
[ 9 13 13 17  8 15 17  3 19 12]
[ 1 13  9  1  6 13 16 15 18  1]
[19 11]
-----------------------------
-----------------------------
------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------

------------------------------
------------------------------

0.8397707979626485
------------------------------
------------------------------

precision    recall  f1-score   support

alt.atheism       0.86      0.86      0.86       201
comp.graphics       0.59      0.86      0.70       250
comp.os.ms-windows.misc       0.89      0.10      0.17       248
comp.sys.ibm.pc.hardware       0.60      0.88      0.72       240
comp.sys.mac.hardware       0.93      0.78      0.85       242
comp.windows.x       0.82      0.84      0.83       263
misc.forsale       0.91      0.70      0.79       257
rec.autos       0.89      0.89      0.89       238
rec.motorcycles       0.98      0.92      0.95       276
rec.sport.baseball       0.98      0.91      0.95       251
rec.sport.hockey       0.93      0.99      0.96       233
sci.crypt       0.86      0.98      0.91       238
sci.electronics       0.85      0.88      0.86       249
sci.med       0.92      0.94      0.93       245
sci.space       0.89      0.96      0.92       221
soc.religion.christian       0.78      0.96      0.86       232
talk.politics.guns       0.88      0.96      0.92       251
talk.politics.mideast       0.90      0.98      0.94       231
talk.politics.misc       0.79      0.89      0.84       188
talk.religion.misc       0.93      0.44      0.60       158

avg / total       0.86      0.84      0.82      4712

Process finished with exit code 0



# --*-- coding:utf-8 --*--

def vector(vector_model_list, data_list):
vector_for_output = [0] * len(vector_model_list)
for item in vector_model_list:
if item in data_list:
vector_for_output[vector_model_list.index(item)] = 1
return vector_for_output



1、先定义一个空列表a，列表长度与需要识别的特征列表b等长度，以0填充；
2、将文本读入并转换为字符串s；
3、提取b中的特征元素T，对比看特征T是否在s中，如果在，则将a中与T在b中相等索引元素设置为1
4、输出a即为输入文本s的文本向量。

# --*-- coding:utf-8 --*--

def vector(vector_model_list, data_list):
vector_for_output = [0] * len(vector_model_list)
for item in vector_model_list:
if item in data_list:
vector_for_output[vector_model_list.index(item)] = 1
return vector_for_output

vector1 = ['s', 'f', 'h', 'k', 'y']

sss = 'ftlykjzsdfkw'

d = vector(vector1, sss)
print(d)

D:\anaconda\python.exe C:/Users/pc/Desktop/pypro/ee.py

[1, 1, 0, 1, 1]

Process finished with exit code 0


vec = CountVectorizer()
x_train = vec.fit_transform(X_train)
x_test = vec.transform(X_test)


CountVectorize()是一个文本转换向量的类，fit_transform()和transform()两个函数都是转换函数，但是前一个是先匹配，再转换，特征列表是自己生成的，而后者若要实现对文本转换为向量，之前必须先对训练集fit()，否则会报错，其特征列表不能自己生成。在上面的代码中，我利用fit_transform()实现对X_train转换，利用transform()实现对X_test转换，是因为在生成x_test之前，x_train已经对训练集fit()过了，这里就不能再利用fit_transform()对x_test进行转换，否则会因为两次fit()过程自生成的特征列表不同造成生成的向量维度不匹配引起程序报错。