Stemming 可以抽取词的词干或词根形式,NLTK中提供了三种最常用的词干提取器接口
'''基于Porter词干提取算法'''
from nltk.stem.porter import PorterStemmer
porter_stemmer = PorterStemmer()
porter_stemmer.stem(‘multiply’) # u’multipli’
''' 基于Lancaster 词干提取算法 '''
from nltk.stem.lancaster import LancasterStemmer
lancaster_stemmer = LancasterStemmer()
lancaster_stemmer.stem(‘multiply’) # ‘multiply’
'''基于Snowball 词干提取算法 ''''
from nltk.stem import SnowballStemmer
snowball_stemmer = SnowballStemmer(“english”)
snowball_stemmer.stem(‘multiply’) # u’multipli’