1

我目前正在尝试使用 Antlr 和 Ecpise 为 Oberon 语言构建一个解析器。

这是我到目前为止所得到的:

grammar oberon;

options
{
    language = Java;
    //backtrack = true;
    output = AST;
}

@parser::header {package dhbw.Oberon;}
@lexer::header {package dhbw.Oberon; }
T_ARRAY : 'ARRAY' ;
T_BEGIN : 'BEGIN';
T_CASE : 'CASE' ;
T_CONST : 'CONST' ;
T_DO : 'DO' ;
T_ELSE : 'ELSE' ;
T_ELSIF : 'ELSIF' ;
T_END : 'END' ;
T_EXIT : 'EXIT' ;
T_IF : 'IF' ;
T_IMPORT : 'IMPORT' ;
T_LOOP : 'LOOP' ;
T_MODULE : 'MODULE' ;
T_NIL : 'NIL' ;
T_OF : 'OF' ;
T_POINTER : 'POINTER' ;
T_PROCEDURE : 'PROCEDURE' ;
T_RECORD : 'RECORD' ;
T_REPEAT : 'REPEAT' ;
T_RETURN : 'RETURN';
T_THEN : 'THEN' ;
T_TO : 'TO' ;
T_TYPE : 'TYPE' ;
T_UNTIL : 'UNTIL' ;
T_VAR : 'VAR' ;
T_WHILE : 'WHILE' ;
T_WITH : 'WITH' ;

module : T_MODULE ID SEMI importlist? declarationsequence?
    (T_BEGIN statementsequence)? T_END ID PERIOD ;

importlist : T_IMPORT importitem (COMMA importitem)* SEMI ;

importitem : ID (ASSIGN ID)? ;

declarationsequence :
        ( T_CONST (constantdeclaration SEMI)*
        | T_TYPE (typedeclaration SEMI)*
        | T_VAR (variabledeclaration SEMI)*)
        (proceduredeclaration SEMI | forwarddeclaration SEMI)*
        ;

constantdeclaration: identifierdef EQUAL expression ;

identifierdef: ID MULT? ;

expression: simpleexpression (relation simpleexpression)? ;

simpleexpression : (PLUS|MINUS)? term (addoperator term)* ;

term: factor (muloperator factor)* ;

factor: number
        | stringliteral
        | T_NIL
        | set
        | designator '(' explist? ')'
        ;

number: INT | HEX ; // TODO add real

stringliteral : '"' ( ~('\\'|'"') )* '"' ;

set: '{' elementlist? '}' ;

elementlist: element (COMMA element)* ;

element: expression (RANGESEP expression)? ;

designator: qualidentifier
    ('.' ID
        | '[' explist ']'
        | '(' qualidentifier ')'
        | UPCHAR )+
        ;

explist: expression (COMMA expression)* ;

actualparameters: '(' explist? ')' ;

muloperator: MULT | DIV | MOD | ET ;

addoperator: PLUS | MINUS | OR ;

relation: EQUAL ; // TODO

typedeclaration: ID EQUAL type ;

type: qualidentifier
        | arraytype
        | recordtype
        | pointertype
        | proceduretype
        ;

qualidentifier: (ID '.')* ID ;

arraytype: T_ARRAY expression (',' expression) T_OF type;

recordtype: T_RECORD ('(' qualidentifier ')')? fieldlistsequence T_END ;

fieldlistsequence: fieldlist (SEMI fieldlist) ;

fieldlist: (identifierlist COLON type)? ;

identifierlist: identifierdef (COMMA identifierdef)* ;

pointertype: T_POINTER T_TO type ;

proceduretype: T_PROCEDURE formalparameters? ;

variabledeclaration: identifierlist COLON type ;

proceduredeclaration: procedureheading SEMI procedurebody ID ;

procedureheading: T_PROCEDURE MULT? identifierdef formalparameters? ;

formalparameters: '(' params? ')' (COLON qualidentifier)? ;

params: fpsection (SEMI fpsection)* ;

fpsection: T_VAR? idlist COLON formaltype ;

idlist: ID (COMMA ID)* ;

formaltype: (T_ARRAY T_OF)* (qualidentifier | proceduretype);

procedurebody: declarationsequence (T_BEGIN statementsequence)? T_END ;

forwarddeclaration: T_PROCEDURE UPCHAR? ID MULT? formalparameters? ;

statementsequence: statement (SEMI statement)* ;

statement : assignment
        | procedurecall
        | ifstatement
        | casestatement
        | whilestatement
        | repeatstatement
        | loopstatement
        | withstatement 
        | T_EXIT
        | T_RETURN expression?
        ;

assignment: designator ASSIGN expression ;

procedurecall: designator actualparameters? ;

ifstatement: T_IF expression T_THEN statementsequence
        (T_ELSIF expression T_THEN statementsequence)*
        (T_ELSE statementsequence)? T_END ;

casestatement: T_CASE expression T_OF caseitem ('|' caseitem)*
    (T_ELSE statementsequence)? T_END ;

caseitem: caselabellist COLON statementsequence ;

caselabellist: caselabels (COMMA caselabels)* ;

caselabels: expression (RANGESEP expression)? ;

whilestatement: T_WHILE expression T_DO statementsequence T_END ;

repeatstatement: T_REPEAT statementsequence T_UNTIL expression ;

loopstatement: T_LOOP statementsequence T_END ;

withstatement: T_WITH qualidentifier COLON qualidentifier T_DO statementsequence T_END ;

ID : ('a'..'z'|'A'..'Z')('a'..'z'|'A'..'Z'|'_'|'0'..'9')* ;
fragment DIGIT : '0'..'9' ;

INT   : ('-')?DIGIT+ ;

fragment HEXDIGIT : '0'..'9'|'A'..'F' ;

HEX : HEXDIGIT+ 'H' ;

ASSIGN : ':=' ;
COLON : ':' ;
COMMA : ',' ;
DIV : '/' ;
EQUAL : '=' ;
ET : '&' ;
MINUS : '-' ;
MOD : '%' ;
MULT : '*' ;
OR : '|' ;
PERIOD : '.' ;
PLUS : '+' ;
RANGESEP : '..' ;
SEMI : ';' ;
UPCHAR : '^' ;

WS : ( ' ' | '\t' | '\r' | '\n'){skip();};

我的问题是,当我检查语法时,我收到以下错误,只是找不到合适的方法来解决这个问题:

rule statement has non-LL(*) decision 
due to recursive rule invocations reachable from alts 1,2.  
Resolve by left-factoring or using syntactic predicates 
or using backtrack=true option. 
|---> statement : assignment

我也有声明序列和简单表达式的问题。

当我使用options { … backtrack = true; … }它时至少可以编译,但是当我运行测试文件时显然不再正常工作,但是我找不到自己解决左递归的方法(或者我可能只是太盲目了此刻,因为我已经看这个太久了)。有什么想法可以更改发生错误的行以使其正常工作吗?

编辑 我可以修复三个错误之一。statement现在工作。问题是,assignment两者procedurecall都以designator.

statement : procedureassignmentcall
        | ifstatement
        | casestatement
        | whilestatement
        | repeatstatement
        | loopstatement
        | withstatement 
        | T_EXIT
        | T_RETURN expression?
        ;

procedureassignmentcall : (designator ASSIGN)=> assignment | procedurecall; 

assignment: designator ASSIGN expression ;

procedurecall: designator actualparameters? ;
4

0 回答 0