我正在使用 Python34。我想从 CSV 文件中获取单词的频率,但它显示错误。这是我的代码。任何人都可以帮我解决这个问题。
from textblob import TextBlob as tb
import math
words={}
def tfidf(word, blob, bloblist):
return tf(word, blob) * idf(word, bloblist)
def tf(word, blob):
return blob.words.count(word) / len(blob.words)
def n_containing(word, bloblist):
return sum(1 for blob in bloblist if word in blob)
def idf(word, bloblist):
return math.log(len(bloblist) / (1 + n_containing(words, bloblist)))
bloblist = open('afterstopwords.csv', 'r').read()
for i, blob in enumerate(bloblist):
print("Top words in document {}".format(i + 1))
scores = {word: tfidf(word, blob, bloblist) for word in blob.words}
sorted_words = sorted(scores.items(), key=lambda x: x[1], reverse=True)
for word, score in sorted_words[:3]:
print("\tWord: {}, TF-IDF: {}".format(word, round(score, 5)))
错误是:
Top words in document 1
Traceback (most recent call last):
File "D:\Python34\tfidf.py", line 45, in <module>
scores = {word: tfidf(word, blob, bloblist) for word in blob.words}
AttributeError: 'str' object has no attribute 'words'