使用nltk去除停用词
import nltk
sent = "I am a student"
word_list = nltk.word_tokenize(sent)
print(word_list)
from nltk.corpus import stopwords
# 先token⼀一把,得到⼀一个word_list
# ...
# 然后filter⼀一把
filtered_words =[word for word in word_list if word not in stopwords.words('english')]
print(filtered_words)
结果:
['I', 'am', 'a', 'student'] ['I', 'student']