import nltk
from nltk.stem import PorterStemmer
from nltk.stem import WordNetLemmatizer
import re
# import enchant
# from nltk.metrics import edit_distance
# 词干提取
stemmer = PorterStemmer()
print(stemmer.stem('cooking'))
print(stemmer.stem('cookery'))
# 词性还原
lemmatizer = WordNetLemmatizer()
print(lemmatizer.lemmatize('cooking'))
print(lemmatizer.lemmatize('cooking', pos='v'))
print(lemmatizer.lemmatize('cookbooks'))
stemmer = PorterStemmer()
# 词干提取
stemmer.stem('believes')
# 词性还原
lemmatizer.lemmatize('believes')
# 基于匹配的正则表达式替换单词
replacement_patterns = [
(r'won\'t', 'will not'),
(r'can\'t', 'cannot'),
(r'i\'m', 'i am'),
(r'ain\'t', 'is not'),
(r'(\w+)\'ll', '\g<1> will'),
(r'(\w+)n\'t', '\g<1> not'),
(r&#
NLTK替换和矫正单词代码示例
最新推荐文章于 2020-12-09 21:38:03 发布