python - 用于解析的概率 CYK 算法不起作用

Question

我正在尝试在概率 CFG 上使用 CYK 算法构建解析树。这是我的概率 CFG

pcfg = PCFG.fromstring("""S -> NP VP [1.0]
                          NP -> DT NN [0.5]
                          NP -> NP PP [0.25]
                          NP -> 'John' [0.1]
                          NP -> 'I' [0.15]
                          PP -> P NP [1.0]
                          VP -> V [0.4]
                          VP -> Vt NP [0.4]
                          VP -> VP PP [0.2]
                          V -> 'sleeps' [0.5]
                          V -> 'laughs' [0.5]
                          Vt -> 'saw' [0.7]
                          Vt -> 'ate' [0.3]
                          P -> 'with' [0.7]
                          P -> 'under' [0.3]
                          DT -> 'the' [0.7]
                          DT -> 'a' [0.3]
                          NN -> 'man' [0.7]
                          NN -> 'woman' [0.2]
                          NN -> 'telescope' [0.1]
                        """)

这是 CYK 构建解析树的代码：

class PCFGParser():
    
    
    def __init__(self,grammar):
        
        #Initialize Nonterminals and the Unary and Binary rules
        
        self.unary_rules = []
        self.binary_rules =[]
        self.N =['S','NP','PP','VP','V','Vt','P','DT','NN']  # List of non-terminals from the grammar

        
        for rule in grammar.productions():
            
            
            if len(rule)==1:
                self.unary_rules.append(rule)
            elif len(rule)==2:
                self.binary_rules.append(rule)
    
    def q(self, X , Y, Z):
        
        #Returns probabilities for Binary rules
        
        print('binary_func_called')
        
       
        print("X : ",X,"TYPE X :",type(X))
        print("Y : ",Y,"TYPE Y :",type(Y))
        print("Z : ",Z,"TYPE Z :",type(Z))
        
        for rule in self.binary_rules:
            print('RULE LHS : ',rule.lhs(),'RULE LHS TYPE :',type(str(rule.lhs())))
            print('RULE RHS 0 : ',rule.rhs()[0],'RULE RHS 0 TYPE : ',type(str(rule.rhs()[0])))
            print('RULE RHS 1 : ',rule.rhs()[1],'RULE RHS 0 TYPE : ',type(str(rule.rhs()[1])))
            if str(rule.lhs())==X and str(rule.rhs()[0])==Y and str(rule.rhs()[1])==Z:
                print('working- returns prob')
                return rule.prob()
            else:
                print(' not satisfied')
            
        return 0

    def q_unary(self, X, W):
        
        #Returns probabilities for Unary rules
        
        for rule in self.unary_rules:
            if str(rule.lhs())==X and rule.rhs()[0]==W:
                return rule.prob()
        return 0
    
    
    def parse(self, sentence):
        
        #Calls the CYK Algorithm and stores the parse tree in a JSON Format
        
        sentence  = sentence.strip()
        print (json.dumps(self.CKY(sentence.split(' '))))
        
                
    def CKY(self, x):
        
        #Returns Tree for a grammar in Chomsky-Normal Form 
        
        n = len(x) # length of sentence x
        pi = defaultdict(float) # DP table pi
        bp = {} # back pointers

        # Base case
        for i in range(n):
            w = x[i] 
            for X in self.N:
                pi[i, i, X] = self.q_unary(X, w) 
        
        # Recursive case
        for l in range(1, n): 
            for i in range(n-l):
                j = i + l
                for X in self.N:
                    max_score = 0
                    args = None
                    for R in self.binary_rules: # search only within the rules with non-zero probability
                        
                        if str(R.lhs()) == X: # consider rules which start from X
                            Y,Z= R.rhs()
                            Y = str(Y)
                            Z = str(Z)
                            for s in range(i, j):
                                if pi[i, s, Y] and pi[s + 1, j, Z]: # calculate score if both pi entries have non-zero score
                                    score = self.q(X, Y, Z) * pi[i, s, Y] * pi[s + 1, j, Z]
                                    if max_score < score:
                                        max_score = score
                                        args = Y, Z, s
                    if max_score: # update table and back pointers
                        pi[i, j, X] = max_score
                        bp[i, j, X] = args

        # Backtrack to retrieve the tree
        if pi[0, n-1, 'S']:
            return self.backtrack_tree(x, bp, 0, n-1, 'S')
        else: # if start symbol is not 'S'
            max_score = 0
            args = None
            for X in self.N:
                print(pi[0, n-1, X])
                if max_score < pi[0, n-1, X]:
                    print('if - working')
                    max_score = pi[0, n-1, X]
                    args = 0, n-1, X
            return self.backtrack_tree(x, bp, *args)
        
        
        
     
    def backtrack_tree(self, sentence, bp, i, j, X):
       #Recurse to get the parse tree
        if i == j:
            return [X, sentence[i]]
        else:
            Y, Z, s = bp[i, j, X]
            return [X, self.backtrack_tree(sentence, bp, i, s, Y), 
                       self.backtrack_tree(sentence, bp, s+1, j, Z)]

parser = PCFGParser(pcfg)
parser.parse('the man sleeps')

但是，介于两者之间的概率计算不正确，导致以下输出以及错误：

binary_func_called
X :  NP TYPE X : <class 'str'>
Y :  DT TYPE Y : <class 'str'>
Z :  NN TYPE Z : <class 'str'>
RULE LHS :  S RULE LHS TYPE : <class 'str'>
RULE RHS 0 :  NP RULE RHS 0 TYPE :  <class 'str'>
RULE RHS 1 :  VP RULE RHS 0 TYPE :  <class 'str'>
 not satisfied
RULE LHS :  NP RULE LHS TYPE : <class 'str'>
RULE RHS 0 :  DT RULE RHS 0 TYPE :  <class 'str'>
RULE RHS 1 :  NN RULE RHS 0 TYPE :  <class 'str'>
working- returns prob
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-16-b0f35f4387c1> in <module>
----> 1 parser.parse('the man sleeps')

<ipython-input-11-33bb4cfed9b6> in parse(self, sentence)
     57 
     58         sentence  = sentence.strip()
---> 59         print (json.dumps(self.CKY(sentence.split(' '))))
     60 
     61 

<ipython-input-11-33bb4cfed9b6> in CKY(self, x)
    109                     max_score = pi[0, n-1, X]
    110                     args = 0, n-1, X
--> 111             return self.backtrack_tree(x, bp, *args)
    112 
    113 

TypeError: backtrack_tree() argument after * must be an iterable, not NoneType

我遵循的算法：

有人可以帮我完成这项工作吗？我已经正确地遵循了上述算法，为什么它会失败？我已经尽力了，但仍然无法弄清楚如何解决这个问题。递归似乎存在问题，因为它返回 S -> NP VP 的 0 概率。

python - 用于解析的概率 CYK 算法不起作用

0 回答 0

Related

Reference