0
    %option lex-compat
    %option noyywrap
    %option yylineno
    %{
    #include<stdio.h>
    #include<conio.h>
    #include<stdlib.h>
    #include<string.h>
    %}
    %{
    int INDENT=0,DEDENT=0,line=0;
    %}
    HASH    "#"
    NEWL    [\n]
    SPACE   " "
    MULTS   """
    COMP    "e^"
    LETTER  ([a-zA-Z])
    HEXL    ([a-fA-F1-9])
    ZERO    "0"
    EXP     "10^"
    COLON   ":"
    DOT     "."
    LPAREN  "("
    RPAREN  ")"
    PLUS    "+"
    MINUS   "-"
    SIGN    ({PLUS}|{MINUS})
    MULT    "*"
    DIV     "/"
    ASSIGN  "="
    EQUAL   "=="
    MORE_THAN ">"
    LESS_THAN "<"
    OR      "or"
    AND     "and"
    NOT     "not"
    IF      "if"
    ELSE    "else"
    LAMBDA  "lambda"
    FOR     "for"
    PASS    "pass"
    H       ({"h"|"H"})
    WHITESPACE  {[\t\n]}
    NUM_NZ      [1-9]
    NUM         [0-9]
    INTEGER     ({SIGN}?({ZERO}|({ZERO}+/({NUM_NZ}+))))
    REAL        ({INTEGER}{DOT}{NUM}|{INTEGER}{DOT}{NUM}{EXP}{INTEGER})
    HEX         ({PLUS}?(ZERO}|{HEXL}+{H}))
    COMPLEX     ({PLUS}?{REAL}+{COMP}{SIGN}?(({REAL}/"i")|"i"))
    ID          ("'"{.}+/{LETTER}+"'")
    STRING      ("'"{.}+"'")
    MULTISTRING ({MULTS}({.}+)|{NEWL}+{MULTS})
    COMMENT     ({.}+/{HASH})
    %%
   {SPACE}      {INDENT++; };
   {NEWL}   
    {
     if(INDENT>DEDENT) printf("Line %d: Found token %s (lexeme:       '%s').\n",yylineno,"INDENT",yytext);  
     if(INDENT<DEDENT)  printf("Line %d: Found token %s (lexeme: '%s').\n",yylineno,"DEDENT",yytext);   
     DEDENT=INDENT; line++;};
    {INTEGER}       printf("Line %d: Found token %s (lexeme: '%s').\n",yylineno,"INTEGER",yytext);
   {REAL}           printf("Line %d: Found token %s (lexeme: '%s').\n",yylineno,"REAL",yytext);
    {HEX}           printf("Line %d: Found token %s (lexeme: '%s').\n",yylineno,"HEX",yytext);
    {COMPLEX}       printf("Line %d: Found token %s (lexeme: '%s').\n",yylineno,"COMPLEX",yytext);
    {STRING}        printf("Line %d: Found token %s (lexeme: '%s').\n",yylineno,"STRING",yytext);
    {MULTISTRING}   printf("Line %d: Found token %s (lexeme: '%s').\n",yylineno,"MULTISTRING",yytext);
    {IF}            printf("Line %d: Found token %s (lexeme: '%s').\n",yylineno,"IF",yytext);
    {ELSE}          printf("Line %d: Found token %s (lexeme: '%s').\n",yylineno,"ELSE",yytext);
    LAMBDA}     printf("Line %d: Found token %s (lexeme: '%s').\n",yylineno,"LAMBDA",yytext);
    {FOR}           printf("Line %d: Found token %s (lexeme: '%s').\n",yylineno,"FOR",yytext);
    {PASS}          printf("Line %d: Found token %s (lexeme: '%s').\n",yylineno,"PASS",yytext);
    {ID}            printf("Line %d: Found token %s (lexeme: '%s').\n",yylineno,"ID",yytext);
    {COLON}         printf("Line %d: Found token %s (lexeme: '%s').\n",yylineno,"COLON",yytext);
    {LPAREN}        printf("Line %d: Found token %s (lexeme: '%s').\n",yylineno,"LPAREN",yytext);
    {RAREN}         printf("Line %d: Found token %s (lexeme: '%s').\n",yylineno,"RPAREN",yytext);
    {ARITH_OP}      printf("Line %d: Found token %s (lexeme: '%s').\n",yylineno,"ARITH_OP",yytext);
    {REL_OP}        printf("Line %d: Found token %s (lexeme: '%s').\n",yylineno,"REL_OP",yytext);
    {LOGIC_OP}      printf("Line %d: Found token %s (lexeme: '%s').\n",yylineno,"LOGIC_OP",yytext);
    {NOT}           printf("Line %d: Found token %s (lexeme: '%s').\n",yylineno,"NOT",yytext);
    {ASSIGN}        printf("Line %d: Found token %s (lexeme: '%s').\n",yylineno,"ASSIGN",yytext);
    .       printf("Line %d: Invalid token  ('%s').\n",yylineno,yytext);
    %%
    int main(int argc,char* argv[])
    {
yylex();
    }

我在第 102 行有一个无法识别的规则,其中包含 yylex();. 我做错了什么,错误在哪里?我找不到它了。

hw1.text , line 102: unrecognized rule

当我尝试编译它时,我得到了这个错误。我正在写flex hw1.text,文件和文件flex在同一个文件夹中。


根据已删除的未回答添加的评论

我做了你让我做的事,出于某种原因,我收到了“printf”行的错误。

我已经添加 :

ARITH_OP   ({PLUS}|{MINUS}|{DIV}|{MULT})
REL_OP     ({MORE_THAN}|{LESS_THAN}|{EQUAL})
LOGIC_OP   ({AND}|{OR})

可能是什么问题?

4

1 回答 1

3

我看到了很多问题:

  1. 之后的动作{NEWL}应该与{NEWL}.

    {NEWL}
    {
    if(INDENT>DEDENT) printf("Line %d: Found token %s (lexeme:       '%s').\n",yylineno,"INDENT",yytext);
    if(INDENT<DEDENT)  printf("Line %d: Found token %s (lexeme: '%s').\n",yylineno,"DEDENT",yytext);
    DEDENT=INDENT; line++;};
    

    这应该是这样的:

    {NEWL} {
              if(INDENT>DEDENT) printf("Line %d: Found token %s (lexeme:       '%s').\n",yylineno,"INDENT",yytext);
              if(INDENT<DEDENT)  printf("Line %d: Found token %s (lexeme: '%s').\n",yylineno,"DEDENT",yytext);
              DEDENT=INDENT; line++;
           };
    
  2. 另一个问题是缺少{规则{LAMBDA}

    {ELSE}          printf("Line %d: Found token %s (lexeme: '%s').\n",yylineno,"ELSE",yytext);
    LAMBDA}     printf("Line %d: Found token %s (lexeme: '%s').\n",yylineno,"LAMBDA",yytext);
    {FOR}           printf("Line %d: Found token %s (lexeme: '%s').\n",yylineno,"FOR",yytext);
    
  3. 您使用{RAREN}而不是{RPAREN}.

  4. 您尚未定义{ARITH_OP}{REL_OP}{LOGIC_OP}

  5. 我认为 MULTS 的规则应该是这样的(但我可能错了):

    MULTS "\""
    
  6. 为了获得干净的编译,我将以下规则移动到扫描仪规则部分之后的注释中:

    {MULTISTRING}   printf("Line %d: Found token %s (lexeme: '%s').\n",yylineno,"MULTISTRING",yytext);
    {ID}            printf("Line %d: Found token %s (lexeme: '%s').\n",yylineno,"ID",yytext);
    {STRING}        printf("Line %d: Found token %s (lexeme: '%s').\n",yylineno,"STRING",yytext);
    {HEX}           printf("Line %d: Found token %s (lexeme: '%s').\n",yylineno,"HEX",yytext);
    {COMPLEX}       printf("Line %d: Found token %s (lexeme: '%s').\n",yylineno,"COMPLEX",yytext);
    {INTEGER}       printf("Line %d: Found token %s (lexeme: '%s').\n",yylineno,"INTEGER",yytext);
    {REAL}          printf("Line %d: Found token %s (lexeme: '%s').\n",yylineno,"REAL",yytext);
    {ARITH_OP}      printf("Line %d: Found token %s (lexeme: '%s').\n",yylineno,"ARITH_OP",yytext);
    {REL_OP}        printf("Line %d: Found token %s (lexeme: '%s').\n",yylineno,"REL_OP",yytext);
    {LOGIC_OP}      printf("Line %d: Found token %s (lexeme: '%s').\n",yylineno,"LOGIC_OP",yytext);
    

显然,您还需要一个函数来代替重复的printf()语句。


“工作”代码

此代码编译并运行。规则仍然存在很多问题(例如,实数只能在小数点后有一位数,标识符必须用单引号括起来)。它使用一个print_token()函数来使扫描仪动作可读。

%option lex-compat
%option noyywrap
%option yylineno

%{
#include <stdio.h>
/*#include <conio.h>*/
#include <stdlib.h>
#include <string.h>
%}

%{
int INDENT=0, DEDENT=0, line=0;
static void print_token(const char *token);

/*
Original rules moved out of the way:

MULTISTRING ({MULTS}({.}+)|{NEWL}+{MULTS})
STRING      ("'"{.}+"'")
INTEGER     ({SIGN}?({ZERO}|({ZERO}+/({NUM_NZ}+))))
ID          ("'"{.}+/{LETTER}+"'")
H           ({"h"|"H"})
COMPLEX     ({PLUS}?{REAL}+{COMP}{SIGN}?(({REAL}/"i")|"i"))
MULTISTRING ({MULTS}({.}+)|{NEWL}+{MULTS})
*/

/*
Examples of patterns matched:

INTEGER: 1 +1 -2 0 99 999 987654321092344
REAL:    1.2 -2.1 3.14159
ID:      'ThisIsAnId'
HEX:     +0 DeadBeefH
*/
%}

HASH        "#"
NEWL        [\n]
SPACE       " "
MULTS       "\""
COMP        "e^"
LETTER      ([a-zA-Z])
HEXL        ([a-fA-F1-9])
ZERO        "0"
EXP         "10^"
COLON       ":"
DOT         "."
LPAREN      "("
RPAREN      ")"
PLUS        "+"
MINUS       "-"
SIGN        ({PLUS}|{MINUS})
MULT        "*"
DIV         "/"
ASSIGN      "="
EQUAL       "=="
MORE_THAN   ">"
LESS_THAN   "<"
OR          "or"
AND         "and"
NOT         "not"
IF          "if"
ELSE        "else"
LAMBDA      "lambda"
FOR         "for"
PASS        "pass"
H           ("h"|"H")
WHITESPACE  {[\t\n]}
NUM_NZ      [1-9]
NUM         [0-9]

INTEGER     ({SIGN}?{NUM_NZ}+)

REAL        ({INTEGER}{DOT}{NUM}|{INTEGER}{DOT}{NUM}{EXP}{INTEGER})
HEX         ({PLUS}?({ZERO}|{HEXL}+{H}))
COMPLEX     ({PLUS}?{REAL}+{COMP}{SIGN}?(({REAL}"i")|"i"))

ID          ("'"{LETTER}+"'")
STRING      ("'"[^']+"'")
MULTISTRING ({MULTS}([^"]+)|{NEWL}+{MULTS})
COMMENT     ({.}+/{HASH})
ARITH_OP    ({PLUS}|{MINUS}|{DIV}|{MULT})
REL_OP      ({MORE_THAN}|{LESS_THAN}|{EQUAL})
LOGIC_OP    ({AND}|{OR})

%%

{SPACE}         { INDENT++; };
{NEWL}          {
                    if (INDENT > DEDENT) print_token("INDENT");  
                    if (INDENT < DEDENT) print_token("DEDENT");   
                    DEDENT = INDENT;
                    line++;
                }
{MULTISTRING}   print_token("MULTISTRING");
{STRING}        print_token("STRING");
{HEX}           print_token("HEX");
{ID}            print_token("ID");
{INTEGER}       print_token("INTEGER");
{COMPLEX}       print_token("COMPLEX");
{REAL}          print_token("REAL");
{IF}            print_token("IF");
{ELSE}          print_token("ELSE");
{LAMBDA}        print_token("LAMBDA");
{FOR}           print_token("FOR");
{PASS}          print_token("PASS");
{COLON}         print_token("COLON");
{LPAREN}        print_token("LPAREN");
{RPAREN}        print_token("RPAREN");
{ARITH_OP}      print_token("ARITH_OP");
{REL_OP}        print_token("REL_OP");
{LOGIC_OP}      print_token("LOGIC_OP");
{NOT}           print_token("NOT");
{ASSIGN}        print_token("ASSIGN");
.               printf("Line %d: Invalid token ('%s').\n", yylineno, yytext);

%%

static void print_token(const char *token)
{
    printf("Line %d: Found token %s (lexeme: '%s').\n", yylineno, token, yytext);
}

int main(void)
{
    yylex();
}

问题之一是/在规则中的使用。在 Lex 和 Flex 中,IIRC 引入了尾随上下文。我并不清楚它在使用它的规则中打算做什么。至少有一个ZERO}你需要的地方{ZERO}。您似乎试图将{.}+其用作[.]+或可能的同义词.+(我通常将其更改为[^"]+[^']+无论如何。

COMMENT 的规则仍然包含一个斜线。似乎哈希标记了评论的结束(但不是评论的一部分),而不是评论的开始;作为评论约定,这有点不寻常(我认为它实际上是不可用的)。

至于我是如何完成这项工作的,我一次只做了一点。

我将导致编译错误的部分移到了注释中。当我得到编译的东西时,我测试了它(在它自己的源代码上)。然后我收集了一条被破坏的规则,记录下来,这样我就知道原来的规则是什么,然后修改它,让它对我有意义。冲洗,重复,一次一步。我在进行过程中使用了 VCS(实际上是git),所以我知道如果我犯了一个严重的错误,我可以恢复一个“工作”版本。你确实使用VCS,不是吗?

于 2013-04-04T21:36:14.823 回答