我可以毫无问题地从单个文件中输入测试数据。但是,每当我尝试从目录中的多个文件输入数据时,都会收到以下错误:AttributeError:'NoneType' 对象没有属性'lower'。请在下面查看我的代码,我将不胜感激。谢谢。
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from nltk.corpus import stopwords
import numpy as np
import numpy.linalg as LA
import os
path = "C:\zircon"
def radfil():
for file in os.listdir(path):
current = os.path.join(path, file)
if os.path.isfile(current):
data = open(current, "rb").read()
print data
train_set = [radfil()]
test_set = ["The sun in the sky is bright."]
stopWords = stopwords.words('english')
vectorizer = CountVectorizer(stop_words=stopWords, min_df=1)
#print vectorizer
transformer = TfidfTransformer()
#print transformer
trainVectorizerArray = vectorizer.fit_transform(train_set).toarray()
testVectorizerArray = vectorizer.transform(test_set).toarray()
print 'Fit Vectorizer to train set', trainVectorizerArray
print 'Transform Vectorizer to test set', testVectorizerArray