用朴素贝叶斯进行简单的垃圾邮件分类
import numpy as np
import re
import os
import random
import numpy as np
# 处理给定路径下的文件
def load_data(folder_path):
os.chdir(folder_path)
doc_list = []
label = []
for i in range(1, 26):
file_name = 'spam/{0}.txt'.format(i)
# 将文件转换成单词列表
words_list = doc2words_list(open(file_name).read())
# 将所有单词放到一个列表中,并制定类别
doc_list.append(words_list)
label.append(1)
file_name = 'ham/{0}.txt'.format(i)
words_list = doc2words_list(open(file_name).read())
doc_list.append(words_list)
label.append(0)
return doc_list, label
# 将文件转换成单词列表
def