我正在使用停用词在 python 中进行自动语言检测
但是我在尝试测试代码时遇到了 KeyError 。这是代码
import nltk
from nltk.corpus import stopwords
def scoreFunction(wholetext):
dictiolist={}
scorelist={}
NLTKlanguage = ["dutch","finnish","german","italian","portuguese","spanish","turkish","danish","english"," french","hungarian","norwegian","russian","swedish"]
FREElanguages = [""]
languages= NLTKlanguages + FREElanguages
for lang in NLTKlanguages:
dictiolist[lang]=stopwords.words(lang)
tokens=nltk.tokenize.word_tokenize(wholetext)
tokens=[t.lower() for t in tokens]
freq_dist=nltk.FreqDist(tokens)
for lang in languages:
scorelist[lang]=0
for word in freq_dist.keys()[0:20]:
if word in dictiolist[lang]:
scorelist[lang]+=1
return scorelist
def whichLanguage(scorelist):
maximum=0
for item in scorelist:
value = scorelist[item]
if maximum<value:
maximum = value
lang = item
return lang
当我运行它 scoreFunction("hillo 我的名字是 osfar,我是天才") 我得到错误 Traceback (last recent call last): File "", line 1, in
scoreFunction("hello my name is osfar and i'm very genius")
File "C:/Users/osama1/Desktop
/fun-test", line 17, in scoreFunction
if word in dictiolist[lang]:
KeyError: ''