0

我有一个问题,这对我来说太重要了。我有一个逐行编写的文本,其大小为 100mb。正文中的几行:

yüceltmek;yücelt;tiğimizin
getirtmek;getirt;tiğimizin
kemikleşmek;kemikleş;tiğimizin
kronikleşmek;kronikleş;tiğimizin
şehirleşmek;şehirleş;tiğimizin
sakinleşmek;sakinleş;tiğimizin
gevşetmek;gevşetmek;tiğimizin
sanayileşmek;sanayileş;tiğimizin

第一个词是字典的键,其他词是它的值。当我将文本加载到字典时,我的程序大小为 1.8 gb。我的一个问题是我如何才能使用有效的内存?另一个问题是为什么磁盘和内存之间有这么大的差距。

读取文本文件:

def LoadMorphemes():
    try:
        dicttKokMorphemes = collections.defaultdict(lambda:list());
        with  codecs.open("C:/Users/Ali/workspace/QTNGram/src/testmorphemes.txt", mode="rb", encoding="utf-8", errors="ignore") as testf:
            for kk in testf:
                if kk:
                    kk = re.sub(ur"[^' \;abcçdefgğhıijklmnoöpqrsştuüvwxyz0-9]", " ", kk.lower(),re.UNICODE)
                    kk = kk.split()
                    if kk:
                        temp = kk[0].split(';')
                        if len(temp) == 3:
                            if temp[0]:
                                if len(temp[1]) > 0 and temp[2]:
                                    dicttKokMorphemes["".join([temp[1],temp[2]])].append(DictMorphemes(temp[0]));
                        elif len(temp) == 1:
                            dicttKokMorphemes[temp[0]].append(DictMorphemes(temp[0]));



except Exception as ex:
    print(ex)

class DictMorphemes(object):
    def __init__(self,__morpheme_ ,_negatition_=None,_morphemes_=None):
        if _negatition_ == None:
            _negatition_=str();
        if _morphemes_ == None:
            _morphemes_=str();
        if __morpheme_:
            self.Morpheme=__morpheme_;
            self.Negation=_negatition_
            self.Morphemes=_morphemes_
        else:
            print(__morpheme_)
            raise 
    def getMorpheme(self):
        return self.Morpheme;
    def getNegation(self):
        return self.Negation;
    def getMorphemes(self):
        return self.Morphemes;
    def setMorpheme(self,_morpheme_):
        self.Morpheme = _morpheme_ ;
    def setNegation(self,_negation_):
        self.Negation = _negation_;
    def setMorphemes(self,_morphemes_):
        self.Morphemes = _morphemes_;
4

0 回答 0