我正在使用 Bison 和 Flex 为类 C 语言开发编译器。目前,编译器能够识别具有声明、赋值和打印语句以及算术和逻辑表达式的语言(仅使用 int 变量)。它生成一个 3AC(以及一些管理内存的指令)。这是我的野牛代码:
%{
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <stdlib.h>
#include "list.h"
int yylex();
void yyerror(char *s);
TList list = NULL;
int i=0;
char* tmp() {
char* t = (char*)malloc(sizeof(char*));
sprintf(t, "t%d", i);
i++;
return t;
}
%}
%union {
int number;
char* identifier;
}
%token <number> NUM
%token <identifier> ID
%token PRINT INT ENDFILE
%left '+' '-'
%left '*' '/'
%right UMINUS
%left OR
%left AND
%right NOT
%nonassoc EQ LT GT LE GE NE
%type <identifier> expr
%type <identifier> comp
%type <identifier> bexpr
%%
program : lstmt ENDFILE { return 0; }
;
lstmt : lstmt stmt ';'
| stmt ';'
| lstmt openb lstmt closeb
| openb lstmt closeb
;
openb : '{' { printf("list = insertElement(list);\n"); }
;
closeb : '}' { printf("list = removeElement(list);\n"); }
;
stmt : INT ID { printf("addVar(\"%s\", &list->table);\n", $2); }
| INT ID '=' NUM {
printf("addVar(\"%s\", &list->table);\n", $2);
printf("setVarList(\"%s\", %d, list);\n", $2, $4);
}
| ID '=' expr { printf("setVarList(\"%s\", %s, list);\n", $1, $3); }
| PRINT '(' ID ')' { printf("printf(\"%%s: %%d\\n\", \"%s\", getVarList(\"%s\", list));\n", $3, $3); }
| ID '=' bexpr { printf("setVarList(\"%s\", %s, list);\n", $1, $3); }
;
bexpr : bexpr OR bexpr {
$$ = tmp();
printf("%s = %s || %s;\n", $$, $1, $3);
}
| bexpr AND bexpr {
$$ = tmp();
printf("%s = %s && %s;\n", $$, $1, $3);
}
| expr OR bexpr {
$$ = tmp();
printf("%s = %s || %s;\n", $$, $1, $3);
}
| expr AND bexpr {
$$ = tmp();
printf("%s = %s && %s;\n", $$, $1, $3);
}
| bexpr OR expr {
$$ = tmp();
printf("%s = %s || %s;\n", $$, $1, $3);
}
| bexpr AND expr {
$$ = tmp();
printf("%s = %s && %s;\n", $$, $1, $3);
}
| NOT bexpr {
$$ = tmp();
printf("%s = !%s;\n", $$, $2);
}
| '(' bexpr ')' { $$ = $2; }
| comp { $$ = $1; }
;
comp : expr LT expr {
$$ = tmp();
printf("%s = %s < %s;\n", $$, $1, $3);
}
| expr LE expr {
$$ = tmp();
printf("%s = %s <= %s;\n", $$, $1, $3);
}
| expr GT expr {
$$ = tmp();
printf("%s = %s > %s;\n", $$, $1, $3);
}
| expr GE expr {
$$ = tmp();
printf("%s = %s >= %s;\n", $$, $1, $3);
}
| expr EQ expr {
$$ = tmp();
printf("%s = %s == %s;\n", $$, $1, $3);
}
| expr NE expr {
$$ = tmp();
printf("%s = %s != %s;\n", $$, $1, $3);
}
| expr AND expr {
$$ = tmp();
printf("%s = %s && %s;\n", $$, $1, $3);
}
| expr OR expr {
$$ = tmp();
printf("%s = %s || %s;\n", $$, $1, $3);
}
| NOT expr {
$$ = tmp();
printf("%s = !%s;\n", $$, $2);
}
;
expr : expr '+' expr {
$$ = tmp();
printf("%s = %s + %s;\n", $$, $1, $3);
}
| expr '-' expr {
$$ = tmp();
printf("%s = %s - %s;\n", $$, $1, $3);
}
| expr '*' expr {
$$ = tmp();
printf("%s = %s * %s;\n", $$, $1, $3);
}
| expr '/' expr {
$$ = tmp();
printf("%s = %s / %s;\n", $$, $1, $3);
}
| '(' expr ')' { $$ = $2; }
| '-' expr %prec UMINUS {
$$ = tmp();
printf("%s = -%s;\n", $$, $2);
}
| ID {
$$ = tmp();
printf("%s = getVarList(\"%s\", list);\n", $$, $1);
}
| NUM {
$$ = tmp();
printf("%s = %d;\n", $$, $1);
}
;
%%
int main () {
list = insertElement(list);
if(yyparse() !=0)
fprintf(stderr, "Abonormal exit\n");
fprintf(fopen("temp.h", "w"), "#include \"list.h\"\n\nTList list = NULL;\nint t" );
for(int j=0; j<i-1; j++) {
fprintf(fopen("temp.h", "a"), "%d, t", j);
}
fprintf(fopen("temp.h", "a"), "%d;", i-1);
return 0;
}
void yyerror (char *s) {
fprintf(stderr, "Error: %s\n", s);
}
如您所见,逻辑表达式的语法有点复杂,但编译器会做它应该做的。该行为类似于 C,因此整数值可用于 AND/OR/NOT。
我对语法的想法是这样的:
bexpr : bexpr OR bexpr
| bexpr AND bexpr
| NOT bexpr
| '(' bexpr ')'
| comp
| expr
;
comp : expr LT expr
| expr LE expr
| expr GT expr
| expr GE expr
| expr EQ expr
| expr NE expr
;
但是通过这种方式,我得到了两个冲突,1 个 shift/reduce 和 1 个 reduce/reduce。有没有办法简化语法?