源文本文档
首先需要引入pycham的各种包
import string
import numpy as np
jieba.load_userdict('七龙珠.txt')
def stop_words():
with open('七龙珠.txt', encoding='utf-8') as f:
return [line.strip() for line in f]
# 对句子进行中文分词
def seg_depart(sentence):
# 对文档中的每一行进行中文分词
sentence_depart = jieba.cut(sentence.strip())
# 创建一个停用词列表
stopwords = stop_words()
# 输出结果为outstr
outstr = ' '
# 去停用词
for word in sent