linux - 对 lex 和 yacc 中的小错误的建议

Question

在我的代码的 NLP 实现中再次出现一些问题.. 在 pro.l 文件中，我有一些模式及其操作...... Pro.l

%{
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
void yyerror(char *str);
%}
%%

[\t] ;
[a-zA-Z]+   { yylval = strdup(yytext) ; return INPUT ; }   /* may be this line is source of error . It has been written so as to include Junk words */
"is equal to"   { return CONDITION ; }
"whose"    { return WHOSE ; }
"by"     { return PREP ; }
"any"    { return QUALIFIER ; }
"our"    { return QUALIFIER ; }
"show"    { return ASK  ; }
"list"    { return ASK ; }
"display us"   { return ASK  ; }
"and"   { return WORD ; } 
"every"  { printf("%s \n", yylval) ; return QUALIFIER  ; }  
"of every"  { return QUALIFIER  ; }
"all"  { printf("%s \n", yylval) ; return QUALIFIER  ; } 
"name"   { yylval = strdup(yytext) ; printf("%s \n", yylval) ; return FIELD ; }
"id"   { yylval = strdup(yytext) ; return FIELD ; }
"the"  { return QUALIFIER  ; }     
"how many"   { return MOJ ; }
"count"   { return MOJ ; }
"book"  { yylval = strdup(yytext) ; printf("%s \n", yylval) ; return TABLE   ; }
"?"   { return EOL ; }
"."   { return EOL ; }
%%

上面给出的是 pro.l，我想包含一个模式：[a-zA-Z]+ action 但它不允许我这样做并给出错误。我想包含这种模式，以便我可以在我的程序中容纳任何垃圾词。

下面给出的是 pro.y

%{
#include <stdio.h>
#include <stdio.h>
#include <string.h>
#define YYSTYPE char *        
extern char *yytext ;

int yylex(void) ;
static void yyerror(char *str) 
{
    printf("yyerror : %s " , str) ;
}
%}

%error-verbose
%token ASK
%token PREP
%token QUALIFIER
%token MOJ
%token WORD
%token TOTAL
%token EOL 
%token TABLE
%token FIELD
%token WHOSE
%token CONDITION
%token INPUT

%%

translate : select
          | count
          ;                                                                                                                                                   


select : ASK QUALIFIER TABLE EOL    { printf("SELECT DISTINCT * FROM %s",$3) ;} 
       | ASK FIELD QUALIFIER TABLE EOL        { printf("SELECT %s  FROM %s",$2,$4) ;}      
       | ASK QUALIFIER TABLE FIELD EOL  { printf("SELECT DISTINCT %s FROM %s ",$4 ,$3) ;}
       | ASK QUALIFIER FIELD WORD FIELD QUALIFIER TABLE EOL    { printf("SELECT DISTINCT %s , %s FROM %s", $3, $5, $7) ;} 
       | ASK QUALIFIER TABLE WHOSE FIELD CONDITION INPUT EOL    { printf("SELECT DISTINCT * FROM %s WHERE %s =", $3, $5); } 
       |    
       ;

count : MOJ TABLE EOL   { printf("SELECT COUNT(*) FROM %s", $2) ;}
      | ASK TABLE MOJ EOL    { printf("SELECT COUNT(*) FROM %s", $2); } 
      | MOJ TABLE PREP FIELD EOL    {printf("SELECT COUNT(*) FROM %s WHERE %s = ",$2 ,$4) ;}
      ;
%%

#include "lex.yy.c"
extern int yylex() ;
extern int yyparse() ;
extern FILE *yyin ;
int main() 
{
    FILE *myfile = fopen("sample.txt" , "r") ;
    if(!myfile)
    {
        printf("Can't open the file ") ;
        return -1 ;
    }
    yyin = myfile ;
    do
    {
        yyparse();
    }while(!feof(yyin)) ;
    yylex_destroy() ;
    return 0 ;
}

请建议我一些方法来容纳当前 lex 文件模式中未表示的垃圾词。

错误 stmts : i/p --> 向我们显示名称等于 xyz 的书。

错误--> yyerror：语法错误，意外输入，期望 QUALIFIER 或 TABLE 或 FIELD。

PS：垃圾词，如：xyz、abc、fgfhg、jhyjg898 等

score 0 · Accepted Answer

您已将[a-zA-Z]+（乔纳森的全部内容）作为第一条规则。如果出现歧义，lex 将采用最长的匹配模式。如果两个模式匹配相同长度的字符串，则采用第一个。因此，通过将包罗万象放在首位，它还将匹配其他关键字，例如whose. 这就是为什么乔纳森说要把这个包罗万象的规则放在最后。这样，所有关键字将首先匹配，并且只有当它们不匹配时才会使用 catch-all。

linux - 对 lex 和 yacc 中的小错误的建议

1 回答 1

Related

Reference