0

计算与文本语​​料库 Gutenberg 关联的所有文件 ID的单词覆盖率。这个的写代码是什么,

import nltk
from nltk.corpus import gutenburg
from decimal import Decimal

for fileid in gutenburg.fileids():
  n_chars = len(gutenburg.raw(fileid))
  n_words = len(gutenburg.words(fileids))
  print(round(Decimal(n_chars/n_words), 7), fileids)
4

1 回答 1

0
import nltk

from nltk.corpus import gutenberg

for fileid in gutenberg.fileids():
    total_unique_words = len(set(gutenberg.words(fileid)))
    total_words = len(gutenberg.words(fileid))
    print(total_words/total_unique_words,fileid)
于 2020-02-09T11:29:55.070 回答