0

这是我的第一个问题:)

我想用 ANTLR 构建一个异构 AST 以获得简单的语法。有不同的接口来表示 AST 节点,例如 IInfiExp、IVariableDecl。ANTLR 提出了 CommonTree 来保存源代码的所有信息(行号、字符位置等),我想用它作为实现 AST 接口的基础 IInfixExp ...

为了获得一个 AST 作为输出,CommonTree 作为节点类型,我设置:

options {
  language     = Java;
  k            = 1;
  output       = AST;
  ASTLabelType = CommonTree;
}

IInifxExp 是:

package toylanguage;

public interface IInfixExp extends IExpression {
    public enum Operator {
        PLUS, MINUS, TIMES, DIVIDE;
    }

    public Operator getOperator();

    public IExpression getLeftHandSide();

    public IExpression getRightHandSide();
}

并且实现 InfixExp 是:

package toylanguage;

import org.antlr.runtime.Token;
import org.antlr.runtime.tree.CommonTree;

// IInitializable has only void initialize()
public class InfixExp extends CommonTree implements IInfixExp, IInitializable {
    private Operator operator;
    private IExpression leftHandSide;
    private IExpression rightHandSide;

    InfixExp(Token token) {
        super(token);
    }

    @Override
    public Operator getOperator() {
        return operator;
    }

    @Override
    public IExpression getLeftHandSide() {
        return leftHandSide;
    }

    @Override
    public IExpression getRightHandSide() {
        return rightHandSide;
    }

    // from IInitializable. get called from ToyTreeAdaptor.rulePostProcessing
    @Override
    public void initialize() {
        // term ((PLUS|MINUS) term)+
        // atom ((TIMES|DIIDE) atom)+

        // exact 2 children
        assert getChildCount() == 2;

        // left and right child are IExpressions
        assert getChild(0) instanceof IExpression
                && getChild(1) instanceof IExpression;

        // operator
        switch (token.getType()) {
        case ToyLanguageParser.PLUS:
            operator = Operator.PLUS;
            break;
        case ToyLanguageParser.MINUS:
            operator = Operator.MINUS;
            break;
        case ToyLanguageParser.TIMES:
            operator = Operator.TIMES;
            break;
        case ToyLanguageParser.DIVIDE:
            operator = Operator.DIVIDE;
            break;
        default:
            assert false;
        }

        // left and right operands
        leftHandSide = (IExpression) getChild(0);
        rightHandSide = (IExpression) getChild(1);
    }
}

相应的规则是:

exp // e.g. a+b
  : term ((PLUS<InfixExp>^|MINUS<InfixExp>^) term)*
  ;

term // e.g. a*b
  : atom ((TIMES<InfixExp>^|DIVIDE<InfixExp>^) atom)*
  ;

这很好用,因为 PLUS、MINUS 等是“真正的”令牌。

但现在来到想象中的令牌:

tokens {
  PROGRAM;
}

对应的规则是:

program // e.g. var a, b; a + b
  : varDecl* exp
    -> ^(PROGRAM<Program> varDecl* exp)
  ;

有了这个,ANTLR 不会创建以 PROGRAM 作为根节点的树。

在解析器中,以下代码创建 Program 实例:

root_1 = (CommonTree)adaptor.becomeRoot(new Program(PROGRAM), root_1);

与 InfixExp 不同,不是调用 Program(Token) 构造函数,而是调用 Program(int)。

程序是:

package toylanguage;

import java.util.Collections;
import java.util.LinkedList;
import java.util.List;

import org.antlr.runtime.Token;
import org.antlr.runtime.tree.CommonTree;

class Program extends CommonTree implements IProgram, IInitializable {
    private final LinkedList<IVariableDecl> variableDeclarations = new LinkedList<IVariableDecl>();
    private IExpression expression = null;

    Program(Token token) {
        super(token);
    }

    public Program(int tokeType) {
        // What to do?
        super();
    }

    @Override
    public List<IVariableDecl> getVariableDeclarations() {
        // don't allow to change the list
        return Collections.unmodifiableList(variableDeclarations);
    }

    @Override
    public IExpression getExpression() {
        return expression;
    }

    @Override
    public void initialize() {
        // program: varDecl* exp;

        // at least one child
        assert getChildCount() > 0;

        // the last one is a IExpression
        assert getChild(getChildCount() - 1) instanceof IExpression;

        // iterate over varDecl*
        int i = 0;
        while (getChild(i) instanceof IVariableDecl) {
            variableDeclarations.add((IVariableDecl) getChild(i));
            i++;
        }

        // exp
        expression = (IExpression) getChild(i);
    }
}

你可以看到构造函数:

    public Program(int tokeType) {
        // What to do?
        super();
    }

结果,使用 super() 构建了一个没有令牌的 CommonTree。所以 CommonTreeAdaptor.rulePostProcessing 看到的是一个平面列表,而不是一个以 Token 作为根的树。

我的 TreeAdaptor 看起来像:

package toylanguage;

import org.antlr.runtime.tree.CommonTreeAdaptor;

public class ToyTreeAdaptor extends CommonTreeAdaptor {
    public Object rulePostProcessing(Object root) {
        Object result = super.rulePostProcessing(root);

        // check if needs initialising
        if (result instanceof IInitializable) {
            IInitializable initializable = (IInitializable) result;
            initializable.initialize();
        }

        return result;
    };
}

并测试我使用的任何东西:

package toylanguage;

import org.antlr.runtime.ANTLRStringStream;
import org.antlr.runtime.CommonTokenStream;
import org.antlr.runtime.RecognitionException;
import org.antlr.runtime.TokenStream;
import org.antlr.runtime.tree.CommonTree;

import toylanguage.ToyLanguageParser.program_return;

public class Processor {
    public static void main(String[] args) {
        String input = "var a, b; a + b + 123"; // sample input

        ANTLRStringStream stream = new ANTLRStringStream(input);
        ToyLanguageLexer lexer = new ToyLanguageLexer(stream);
        TokenStream tokens = new CommonTokenStream(lexer);
        ToyLanguageParser parser = new ToyLanguageParser(tokens);
        ToyTreeAdaptor treeAdaptor = new ToyTreeAdaptor();
        parser.setTreeAdaptor(treeAdaptor);

        try {
            // test with: var a, b; a + b
            program_return program = parser.program();

            CommonTree root = program.tree;
            // prints 'a b (+ a b)'
            System.out.println(root.toStringTree());

            // get (+ a b), the third child of root
            CommonTree third = (CommonTree) root.getChild(2);

            // prints '(+ a b)'
            System.out.println(third.toStringTree());

            // prints 'true'
            System.out.println(third instanceof IInfixExp);

            // prints 'false'
            System.out.println(root instanceof IProgram);
        } catch (RecognitionException e) {
            e.printStackTrace();
        }
    }
}

为了完整起见,这里是完整的语法:

grammar ToyLanguage;

options {
  language     = Java;
  k            = 1;
  output       = AST;
  ASTLabelType = CommonTree;
}

tokens {
  PROGRAM;
}

@header {
  package toylanguage;
}

@lexer::header {
  package toylanguage;
}

program // e.g. var a, b; a + b
  : varDecl* exp
    -> ^(PROGRAM<Program> varDecl* exp)
  ;

varDecl // e.g. var a, b;
  : 'var'! ID<VariableDecl> (','! ID<VariableDecl>)* ';'!
  ;

exp // e.g. a+b
  : term ((PLUS<InfixExp>^|MINUS<InfixExp>^) term)*
  ;

term // e.g. a*b
  : atom ((TIMES<InfixExp>^|DIVIDE<InfixExp>^) atom)*
  ;

atom
  : INT<IntegerLiteralExp> // e.g. 123
  | ID<VariableExp>        // e.g. a
  | '(' exp ')' -> exp     // e.g. (a+b)
  ;

INT    : ('0'..'9')+ ;
ID     : ('a'..'z')+ ;
PLUS   : '+' ;
MINUS  : '-' ;
TIMES  : '*' ;
DIVIDE : '/' ;

WS : ('\t' | '\n' | '\r' | ' ')+ { $channel = HIDDEN; } ;

OK,最后一个问题是如何从

program // e.g. var a, b; a + b
  : varDecl* exp
    -> ^(PROGRAM<Program> varDecl* exp)
  ;

以 PROGRAM 为根的树

^(PROGRAM varDecl* exp)

而不是一个平面列表

(varDecl* exp) ?

(抱歉这么多代码片段)

Ciao顶点

4

1 回答 1

1

尝试创建以下构造函数:

public Program(int tokenType) {
    super(new CommonToken(tokenType, "PROGRAM"));
}
于 2011-07-21T05:48:17.863 回答