3

我使用 Jison 编写了一个解析器,它能够处理带有运算符和布尔运算支持的类似 google 的搜索查询。目前,我很难弄清楚如何在 AND OR 和 NOT 运算符之间接受空格。任何帮助将不胜感激,我在下面附上了一些所需输入/输出的示例。

输入:

  1. 真 && 假 || 真的
  2. ( 真 ) && ( 假 || 真 )
  3. 真&&假||真

结果:

1-3。([真]&&([假]||[真]))

代码:

%lex
%%

/* Lexical Grammar */

"AND"|"&&"          { return "AND" }
"OR"|"||"           { return "OR" }
"NOT"|"!"           { return "NOT" }
"("                 { return "OPEN" }
")"                 { return "CLOSE" }
":"                 { return "QUAL" }
"-"                 { return "DASH" }
"\""|"'"            { return "QUOTE" }
\s+                 { return "SPACE" }
\w+                 { return "WORD" }
"."                 { return "DOT" }
<<EOF>>             { return "EOF" }
.                   { return "INVALID" }
/lex

/* Operators */

%right AND OR
%right NOT
%right QUAL DASH DOT

%start START

%%

/* Language Grammar */

START
    : EXP EOF
        { return $1; }
    ;

EXP
    : EXP AND EXP
        { $$ = "(" + $1 + "&&" + $3 + ")"; }
    | EXP OR EXP
        { $$ = "(" + $1 + "||" + $3 + ")"; }
    | NOT EXP
        { $$ = "(!" + $2 + ")"; }
    | OPEN EXP CLOSE
        { $$ = $2; }
    | ARGS
        { $$ = "[" + $1 + "]"; }
    ;

ARGS
    : ARG SPACE ARGS
        { $$ = [ $1 ].concat($3); }
    | OP SPACE ARGS
        { $$ = [ $1 ].concat($3); }
    | ARG
        { $$ = [ $1 ]; }
    | OP
        { $$ = [ $1 ]; }
    ;

OP
    : DASH OP
        { $$ = "-" + $2; }
    | ARG QUAL ARG
        { $$ = $1 + ":" + $3; }
    ;

ARG
    : DASH ARG
        { $$ = "-" + $2; }
    | QUOTE TERMS QUOTE
        { $$ = $2.join(" "); }
    | TERM
        { $$ = $1; }
    ;

TERMS
    : TERM SPACE TERMS
        { $$ = [ $1 ].concat($3); }
    | TERM
        { $$ = [ $1 ]; }
    ;

TERM
    : TERM DASH TERM
        { $$ = $1 + $2 + $3; }
    | TERM DOT TERM
        { $$ = $1 + $2 + $3; }
    | WORD
        { $$ = $1; }
    ;
4

1 回答 1

2

弄清楚了。我开始忽略空格,更改了一些规则,并解决了冲突。解析器返回一个函数,用于确定某个对象是否与查询匹配。这是最终结果:

/* Google-Like Parser */

/* Lexical Grammar */

%lex
%%

\s+                   { /* ignore whitespace */ }
"AND"|"&&"            { return "AND" }
"OR"|"||"             { return "OR" }
"NOT"|"!"             { return "NOT" }
"("                   { return "OPEN" }
")"                   { return "CLOSE" }
":"                   { return "QUAL" }
"-"                   { return "NEG" }
"\""|"'"              { return "QUOTE" }
\w+                   { return "WORD" }
"."                   { return "DOT" }
<<EOF>>               { return "EOF" }
.                     { return "INVALID" }

/lex

/* Operators */

%right AND OR
%right NOT
%right QUAL NEG DOT

%start START

%%

/* Language Grammar */

START
    : EXP EOF
        { return $1; }
    ;

EXP
    : EXP AND EXP
        { $$ = function(obj) { return ($1(obj) && $3(obj)); }; }
    | EXP OR EXP
        { $$ = function(obj) { return ($1(obj) || $3(obj)); }; }
    | NOT EXP
        { $$ = function(obj) { return !($2(obj)); }; }
    | OPEN EXP CLOSE
        { $$ = $2; }
    | ARGS
        { $$ = function(obj) { return parser.processArgs(obj, $1)(obj); }; }
    ;

ARGS
    : ARG ARGS
        { $$ = [ $1, $2]; }
    | OP ARGS
        { $$ = [ $1, $2]; }
    | ARG
        { $$ = [ $1 ]; }
    | OP
        { $$ = [ $1 ]; }
    ;

OP
    : NEG ARG
        {{
            $2.not = true;
            $$ = $2;
        }}
    | NEG ARG QUAL ARG
        {{
            $$ = {
                "not": true,
                "operator": $2.operand,
                "operand": $4.operand
            };
        }}
    | ARG QUAL ARG
        {{
            $$ = {
                "not": false,
                "operator": $1.operand,
                "operand": $3.operand
            };
        }}
    ;

ARG
    : QUOTE TERMS QUOTE
        {{
            $$ = {
                "not": false,
                "operator": null,
                "operand": $2.join(" ")
            };
        }}
    | TERM
        {{
            $$ = {
                "not": false,
                "operator": null,
                "operand": $1
            };
        }}
    ;

TERMS
    : TERM TERMS
        { $$ = [ $1 ].concat($2); }
    | TERM
        { $$ = [ $1 ]; }
    ;

TERM
    : WORD DOT TERM
        { $$ = $1 + $2 + $3; }
    | WORD
        { $$ = $1; }
    ;

%%

parser.processArgs = function(obj, args) {
    if (args.length > 1)
    {
        if (args[0].operator)
            return function(obj) { return (parser.matchArg(obj, args[0]) && parser.processArgs(args[1])(obj)); };
        else
            return function(obj) { return (parser.matchArg(obj, args[0]) || parser.processArgs(args[1])(obj)); };
    }
    else
    {
        return function(obj) { return parser.matchArg(obj, args[0]); };
    }
}

/* Override Later */
parser.matchArg = function(obj, arg) {
    return true;
}
于 2014-06-25T18:20:10.907 回答