grammar - 有没有更好的方法来指定 CFG 规则中的可选元素？

Question

考虑一种语言和编译器来为其设计和开发。在这种语言中，有一个特定的语句是语法的一部分：(=<identifier>). 这一段可以被编译器识别。但是括号和等号和标识符之间允许有空格。所以我有这些可能性：

(=<identifier>)
( = <identifier> )
(=identifier )
( =identifier )
...

在不考虑整个语法而只考虑处理此语言功能的规则的情况下，我有这样的东西（在语法规则的类似 Bison 的语法中）：

statement: OBRCKT EQ ID CBRCKT
         | OBRCKT S EQ S ID S CBRCKT
         | OBRCKT S EQ ID S CBRCKT
         | OBRCKT S EQ S ID CBRCKT
         | OBRCKT S EQ ID CBRCKT
         | OBRCKT EQ S ID S CBRCKT
         | OBRCKT EQ ID S CBRCKT
         | OBRCKT EQ S ID CBRCKT
         | ...

空间终端S可以出现也可以不出现。但是规则的方式是，我需要指定所有可能的组合......有没有更好的方法来实现这个结果？

score 3 · Accepted Answer

正如吉姆所说，使用你的词汇工具来处理这些情况，而不是把它们写进你的语法产生中。

例如，我通常使用 Flex 进行词法分析，使用 Bison 来定义我的语法（可能就像你所做的那样）。

您可以通过以下方式实现您想要的结果（这只是一个示例，因此非常简单，不能做太多事情）：

词法分析器.l

 /* lexicalAnalyzer.l
    Specifications of tokens for some language.
 */
%{

%}

/*
 *  Definitions of regular expressions
 *  Note: You capture whitespace here...
*/

WSPACE      [ \t\r]+ //We take care of the spaces here...

/*
 *  Tokens
*/
%%

"=" {
        printf("TOKEN: EQ   LEXEME: %s\n", yytext);             
        return T_EQ;
    }
"(" {
        printf("TOKEN: OBRCKT   LEXEME: %s\n", yytext);
        return T_OBRCKT;
    }
")" {
        printf("TOKEN: CBRCKT   LEXEME: %s\n", yytext);
        return T_CBRCKT;
    }
"<" {
        printf("TOKEN: LT   LEXEME: %s\n", yytext);
        return T_LT;
    }
">" {
        printf("TOKEN: GT   LEXEME: %s\n", yytext);
        return T_GT;
    }
"identifier"    {
                printf("TOKEN: IDENT   LEXEME: %s\n", yytext);
            return T_IDENT;
             }  
{WSPACE}    { }
.       {
                 printf("TOKEN: UNKNOWN   LEXEME: %s\n", yytext);
                 return T_UNKNOWN;
            }
%%

语法分析器.y

/*
      syntaxAnalyzer.y

      To create syntax analyzer:
        flex file.l
        bison file.y
        g++ file.tab.c -o file_parser
        file_parser < inputFileName
 */

/*
 *  Declaration section.
 */

%{

    #include <stdio.h>
    void printRule(const char *lhs, const char *rhs);
    int yyerror(const char *s) {
        printf("Error!");
    }
    extern "C" {
        int yyparse(void);
        int yylex(void);
        int yywrap() {return 1;}
    }
%}

/*
 *  Token declarations
*/
%token  T_OBRCKT T_CBRCKT
%token  T_LT T_GT T_EQ   
%token  T_IDENT T_UNKNOWN

/*
 *  Starting point.
 */
%start  N_START

/*
 *  Translation rules.
 */
%%
N_START     : N_STATEMENT
            {
                printRule("START", "STATEMENT");
                printf("\n---- Completed parsing ----\n\n");
                return 0;
            }
            ;

N_STATEMENT : T_OBRCKT T_EQ T_LT T_IDENT T_GT T_CBRCKT
            {
            printRule("EXPR", "T_OBRCKT T_EQ T_LT T_IDENT T_GT T_CBRCKT");

            }
            ;
%%

#include "lex.yy.c"
extern FILE *yyin;

void printRule(const char *lhs, const char *rhs) {
  printf("%s -> %s\n", lhs, rhs);
  return;
}

int main() {
  do {
    yyparse();
  } while (!feof(yyin));
  return 0;
}

grammar - 有没有更好的方法来指定 CFG 规则中的可选元素？

1 回答 1

Related

Reference