machine-learning - 无法为 Reber Grammar 设置 Pybrain LSTM 模块

Question

我正在尝试使用 Pybrain 来预测属于 Reber 语法的字符序列。

具体来说，我正在做的是使用 Reber 语法图生成字符串（您可以在此处查看：http ://www.felixgers.de/papers/phd.pdf第 22 页）。此类字符串的一个示例可能是 BPVVE。我希望我的神经网络能够学习语法的基本规则。对于这些字符串中的每一个，我创建一个通常如下所示的序列：

             [B, T, S, X, P, V, E,]   ,           [B, T, S, X, P, V, E,]
B -> value = [1, 0, 0, 0, 0, 0, 0,]   ,  target = [0, 0, 0, 0, 1, 0, 0,]
P -> value = [0, 0, 0, 0, 1, 0, 0,]   ,  target = [0, 0, 0, 0, 0, 1, 0,]
V -> value = [0, 0, 0, 0, 0, 1, 0,]   ,  target = [0, 0, 0, 0, 0, 1, 0,]
V -> value = [0, 0, 0, 0, 0, 1, 0,]   ,  target = [0, 0, 0, 0, 0, 0, 1,]
E -> E is ignored for now because it marks the end

如您所见，该值只是表示当前字母的 7 维向量，目标是 Reber 单词中的下一个字母。

这是我要运行的代码：

#!/usr/bin/python

import reberGrammar as reber
import random as rnd

from pylab import *

from pybrain.supervised          import RPropMinusTrainer
from pybrain.supervised          import BackpropTrainer

from pybrain.datasets            import SequenceClassificationDataSet
from pybrain.structure.modules   import LSTMLayer, SoftmaxLayer
from pybrain.tools.validation    import testOnSequenceData
from pybrain.tools.shortcuts     import buildNetwork

def reberToListInt(word): #e.g. "BPVVE" -> [0,4,3,3,5]
    out = [None]*len(word)

    for i,l in enumerate(word):
        if l == 'B':
            out[i] = 0
        elif l == 'T':
            out[i] = 1
        elif l == 'S':
            out[i] = 2
        elif l == 'V':
            out[i] = 3
        elif l == 'P':
            out[i] = 4
        elif l == 'E':
            out[i] = 5
        else :
            out[i] = 6

    return out

def buildReberDataSet(numSample):
    """Generate a 7 class dataset"""

    reberLexicon = reber.ReberGrammarLexicon(numSample)

    DS = SequenceClassificationDataSet(7, 7, nb_classes=7)

    for rw in reberLexicon.lexicon: 
        DS.newSequence()
        rw2 = reberToListInt(rw)
        for i in range(len(rw2)-1): #inserting one letter at a time 
            inpt = outpt = [0.0]*7
            inpt[rw2[i]]=1.0
            outpt[rw2[i+1]]=1.0
            DS.addSample(inpt,outpt)

    return DS

def printDataSet(DS, numLines): #just to print some stat
    print "\t############"
    print "Number of sequences: ",DS.getNumSequences()
    print "Input and output dimensions: ", DS.indim,"\t", DS.outdim
    print "\n"
    for i in range(numLines):
        for inp, target in DS.getSequenceIterator(i):
            print inp,
        print "\n"
    print "\t#############"

'''Dataset creation / split into training and test sets'''

fullDS = buildReberDataSet(700)

tstdata, trndata = fullDS.splitWithProportion( 0.25 )
trndata._convertToOneOfMany( bounds=[0.,1.])
tstdata._convertToOneOfMany( bounds=[0.,1.])

#printDataSet(trndata,2)

'''Network setup / training'''

rnn = buildNetwork( trndata.indim, 7, trndata.outdim, hiddenclass=LSTMLayer, outclass=SoftmaxLayer, outputbias=False, recurrent=True)
trainer = RPropMinusTrainer( rnn, dataset=trndata, verbose=True )
#trainer = BackpropTrainer( rnn, dataset=trndata, verbose=True, momentum=0.9, learningrate=0.5 ) 

trainError=[]
testError =[]

#errors = trainer.trainUntilConvergence()


for i in range(9):
    trainer.trainEpochs( 2 )
    trainError.append(100. * (1.0-testOnSequenceData(rnn, trndata)))
    testError.append(100. * (1.0-testOnSequenceData(rnn, tstdata)))
    print "train error: %5.2f%%" % trainError[i], ",  test error: %5.2f%%" % testError[i]

plot(trainError)
hold(True)
plot(testError)
show()

我没能训练这个网。误差波动很大，没有真正的收敛。我真的很感激对此的一些建议。

这是我用来生成 Reber 字符串的代码：

#!/usr/bin/python

import random as rnd

class ReberGrammarLexicon(object):

    lexicon = set() #contain Reber words
    graph = [ [(1,'T'), (5,'P')], \
            [(1, 'S'), (2, 'X')], \
            [(3,'S') ,(5, 'X')],  \
            [(6, 'E')],           \
            [(3, 'V'),(2, 'P')],  \
            [(4, 'V'), (5, 'T')] ]  #store the graph

    def __init__(self, num, maxSize = 1000): #fill Lexicon with num words

        self.maxSize = maxSize

        if maxSize < 5:
            raise NameError('maxSize too small, require maxSize > 4') 

        while len(self.lexicon) < num:

            word = self.generateWord()
            if word != None:
                self.lexicon.add(word)

    def generateWord(self): #generate one word

        c = 2
        currentEdge = 0
        word = 'B'

        while c <= self.maxSize:

            inc = rnd.randint(0,len(self.graph[currentEdge])-1)
            nextEdge = self.graph[currentEdge][inc][0]
            word += self.graph[currentEdge][inc][1]
            currentEdge = nextEdge
            if currentEdge == 6 :
                break
            c+=1

        if c > self.maxSize :
            return None

        return word

谢谢，

最好的

machine-learning - 无法为 Reber Grammar 设置 Pybrain LSTM 模块

0 回答 0

Related

Reference