python - Python 2：AttributeError：“list”对象没有属性“split”

Question

这是我的 LSA 程序，在这个函数中，我想标记我所有的文本，然后将其转换为词干。我正在尝试将它们集成到词干提取程序中，然后我得到了这个：对于titles.split（“”）中的单词：AttributeError：'list'对象没有属性'split'

此代码 lsa：

# -*- coding: utf-8 -*-

from numpy import zeros
from scipy.linalg import svd
from math import log
from numpy import asarray, sum
#from nltk.corpus import stopwords
from sklearn.metrics.pairwise import cosine_similarity
#from nltk.stem import PorterStemmer
#from nltk.stem.isri import ISRIStemmer
import nltk
#from matplotlib import pyplot as plt
from snowballstemmer import stemmer 


titles = [" ذهبت الاخت الى المدرسة","تقع المدرسة في الجبال",
    "ذهب الام لزيارة ابنتها في المدرسة ","تحضر الام الكعكة" ]

ar_stemmer = stemmer("arabic")

stopwords = ['ثم','و','حتى','الى','على','في']

ignorechars = ''',:'!'''



class LSA(object):
def __init__(self, stopwords, ignorechars):
    self.stopwords = stopwords
    self.ignorechars = ignorechars
    self.wdict = {}
    self.dcount = 0    


def parse(self, doc):

    for word in titles.split(" "):
             stem = ar_stemmer.stemWord(word)

    if stem in self.stopwords:
       pass
    elif stem in self.wdict:
            self.wdict[stem].append(self.dcount)
    else:
            self.wdict[stem] = [self.dcount]
            self.dcount += 1

这就是我想要整合的：

from snowballstemmer import stemmer
ar_stemmer = stemmer("arabic")
sentence = u" ذهبت الاخت الى المدرسة, تقع المدرسة في الجبال"

for word in sentence.split(" "):
stem = ar_stemmer.stemWord(word)
print stem

score 2 · Accepted Answer

titles已经是一个列表；改为这样做：

for sentence in titles:
    for word in sentence.split(" "):
        ...

score 2 · Accepted Answer

列表对象没有split像字符串那样的方法。如果要拆分titles列表中的每个字符串，可以嵌套一个循环并执行以下操作：

def parse(self, doc):

    for title in titles:
        for word in title.split():
            stem = ar_stemmer.stemWord(word)

            if stem in self.stopwords:
                pass
    ...

python - Python 2：AttributeError：“list”对象没有属性“split”

2 回答 2

Related

Reference