我在python中使用stopwords进行自动语言检测
但是我在尝试测试代码时遇到了KeyError。
这是密码import nltk
from nltk.corpus import stopwords
def scoreFunction(wholetext):
dictiolist={}
scorelist={}
NLTKlanguage = ["dutch","finnish","german","italian","portuguese","spanish","turkish","danish","english"," french","hungarian","norwegian","russian","swedish"]
FREElanguages = [""]
languages= NLTKlanguages + FREElanguages
for lang in NLTKlanguages:
dictiolist[lang]=stopwords.words(lang)
tokens=nltk.tokenize.word_tokenize(wholetext)
tokens=[t.lower() for t in tokens]
freq_dist=nltk.FreqDist(tokens)
for lang in languages:
scorelist[lang]=0
for word in freq_dist.keys()[0:20]:
if word in dictiolist[lang]:
scorelist[lang]+=1
return scorelist
def whichLanguage(scorelist):
maximum=0
for item in scorelist:
value = scorelist[item]
if maximum
maximum = value
lang = item
return lang
当我运行scoreFunction(“hillo我的名字是osfar,我是天才”)
我知道错误了
回溯(最近一次调用):文件“”,第1行,中
^{pr2}$