首先要拥有停用词表,可以在网上下载直接用,也可以自己加一些适合自己的词
import jieba
#加载停用词表
def stopwordslist():
stopwords = [line.strip() for line in open('D://停用词表.txt',encoding='UTF-8').readlines()]
return stopwords
# 对句子进行中文分词
def seg_depart(sentence):
#分词
sentence_depart = jieba.cut(sentence.strip())
# 创建一个停用词列表
stopwords = stopwordslist()
# 输出结果为outstr
outstr = ''
# 去停用词
for word in sentence_depart:
if word not in stopwords:
if word != '\t':
outstr += word
outstr += " "
return outstr