13

如何使用PEG.js为左关联运算符构建 AST(抽象语法树)?

我试图根据我在互联网上找到的信息编写一些代码,但我似乎犯了一个错误。

我编写的代码为大多数表达式生成了不正确的 AST。

表达

12-6-4-2*1-1

预期 AST

{
    "left": {
        "left": {
            "left": {
                "left": 12,
                "operator": "-",
                "right": 6
            },
            "operator": "-",
            "right": 4
        },
        "operator": "-",
        "right": {
            "left": 2,
            "operator": "*",
            "right": 1
        }
    },
    "operator": "-",
    "right": 1
}

生成的 AST

{
   "left": {
      "left": {
         "left": 12,
         "operator": "-",
         "right": 6
      },
      "operator": "-",
      "right": 4
   },
   "operator": "-",
   "right": {
      "left": 2,
      "operator": "*",
      "right": {
         "left": 1,
         "operator": "-",
         "right": 1
      }
   }
}

代码

{

    function operator(first, rest) {
        if (rest.length === 0) return first;

        return { left: first, right: rest };
    };

    function makeOperator(left, operator, right) {
        return { left: left, operator: operator[0], right: clean(right[1]) };
    };

    function clean(expression) {
        if (!expression.right) return expression;

        var result = makeOperator(expression.left, expression.right[0], expression.right[0]);

        for (var counter = 1, len = expression.right.length; counter < len; counter++) {
            result = makeOperator(result, expression.right[counter], expression.right[counter]);
        }

        return result;
    };

}

Start = E

E
  = expression:E1

    { return clean(expression); }

E1
  = expression:E2 rest:(("+" / "-") E2)*

    { return operator(expression, rest); }

E2
  = expression:Value rest:(("*" / "/") E1)*

    { return operator(expression, rest); }


Value
  = Number
  / BracketedExpression

Number
  = [1-9][0-9]*

    { return parseInt(text(), 10); }

BracketedExpression
  = "(" expression:E1 ")"

    { return expression; }

我非常感谢任何有关如何为左关联和右关联运算符构建 AST 的帮助或示例代码。

编辑:正如@Bergi 指出的那样,问题在于E2用作E1运算符列表其余部分的表达式而不是Value. 但是,Bergi 写的代码比我的要简单得多。

4

2 回答 2

16

右关联运算符编写起来相对简单,因为它们可以“本机”递归地解析:

E2
  = l:Value op:("*" / "/") r:E2
    { return {left:l, operator:op, right:r}; }
  / Value

// or equivalently (but more efficiently):

E2
  = l:Value r:(("*" / "/") E2)?
    { if (!r) return l;
      return {left:l, operator:r[0], right:r[1]}
    }

我们可以分别翻译左结合运算符的语法:

// [Do not use]
E1
  = l:E1 op:("-" / "+") r:E2
    { return {left2:l, operator:op, right2:r}; }
  / E2

但我们从中得到的只是一个错误Left recursion detected for rule "E1".确实,PEG 不具备左递归规则的能力,但 Wikipedia 告诉我们如何解决这个问题:我们需要将递归展开为*循环。您已经这样做了,但括号不同。它们应该与上面的递归定义相匹配,E2右边是单一的:

E1
  = ls:(E2 ("+" / "-"))* r:E2

这样我们就可以s使用递归辅助函数轻松地构建树:

    { return leftAssociative(ls, r); }

    function leftAssociative(ls, r) {
        if (!ls.length) return r;
        var last = ls.pop();
        return {left:leftAssociative(ls, last[0]), operator:last[1], right:r};
    }

或者,您可以使用循环,它与右侧的循环最匹配的包围:

E1
  = l:E2 rs:(("+" / "-") E2)*
    { var e = l;
      for (var i=0; i<rs.length; i++)
          e = {left:e, operator:rs[i][0], right:rs[i][1]};
      return e;
    }

作为参考,这里是完整的解析器:

{ 
    function leftAssoc(rest, val) {
        if (!rest.length) return val;
        var last = rest.pop();
        return {left:leftAssoc(rest, last[0]), operator:last[1], right:val};
    }
    function rightAssoc(val, rest) {
        if (!rest.length) return val;
        var first = rest.shift();
        return {left:val, operator:first[0], right:rightAssoc(first[1], rest)};
    }
}

Start = E1
 
E1 = rest:(E2 ("+" / "-"))* v:E2
     { return leftAssoc(rest, v); }

E2 = v:Value rest:(("*" / "/") Value)*
     { return rightAssoc(v, rest); }

Value = Number
      / BracketedExpression

Number = [1-9][0-9]*
         { return parseInt(text(), 10); }

BracketedExpression = "(" expression:E1 ")"
                      { return expression; }
于 2014-06-14T23:44:40.840 回答
1

这是一种在利用现代 Javascript 的同时实现这一目标的方法。

下面的示例使用Array.reduce箭头函数对象扩展语法来实现本质上是单行的。

Start 
  = head:E1 tail:(a:"->" b:E2 {return {operator:a, right:b}})* 
  {return tail.reduce((t,h) => ({...h, left:t}), head)}

// example expressions for operands ...
// (assuming non-commutative operator)
E1 = [pqr]
E2 = [xyz]

// input of: `p->y->z`
// results in output:
/*
```json
{
    "operator": "->",
    "right": "z",
    "left": {
      "operator": "->",
      "right": "y",
      "left": "p"
    }
}
```
*/
于 2018-08-28T05:36:04.137 回答