1

最近在这里问了一个问题: Boost Spirit Segfault In Parser

在这篇文章中,有人指出我正在使用的语法绝对是左递归的,并且精神是一个 PEG 解析器生成器,这意味着左递归是不可能的。

我使用 Dragon Book 中处理左递归部分的规则将语法转换为非左递归语法。

给定一个左递归文法

A -> A >> alpha | beta

可以通过执行以下操作将其转换为等效的右递归文法:

A -> beta >> A'

A' -> alpha >> A' | epsilon 

这是生成的解析器,我认为是非左递归产生:

namespace interpreter {
namespace qi = boost::spirit::qi;
template <typename Iterator, typename Skipper>
struct InterpreterGrammar : qi::grammar<Iterator, Skipper>
{           
    template <typename TokenDef>
    InterpreterGrammar(TokenDef const& tok)
        : InterpreterGrammar::base_type(start)
    {
        using boost::phoenix::ref;

        start %= functionList >> endList >> qi::eoi;

        // different expressions
        exp %= 
               qi::token(k_alphaTerminal) >> qi::token(k_equalTo) >> qi::token(k_alphaTerminal) >> expPrime
               |
               qi::token(k_numericTerminal) >> expPrime
               |
               qi::token(k_trueTok) >> expPrime
               |
               qi::token(k_falseTok) >> expPrime;

        expPrime %=     
               qi::token(k_equalTo) >> exp >> expPrime
               |
               qi::token(k_notEq) >> exp >> expPrime
               |
               qi::token(k_less) >> exp >> expPrime
               |
               qi::token(k_lessEq) >> exp >> expPrime
               |
               qi::token(k_greater) >> exp >> expPrime
               |
               qi::token(k_greaterEq) >> exp >> expPrime
               |
               qi::token(k_andTok) >> exp >> expPrime
               |
               qi::token(k_orTok) >> exp >> expPrime
               |
               qi::token(k_notTok) >> exp 
               |
               qi::token(k_plues) >> exp >> expPrime
               |
               qi::token(k_minus) >> exp >> expPrime
               |
               qi::token(k_mult) >> exp >> expPrime
               |
               qi::token(k_minus) >> exp
               |
               qi::token(k_leftParen) >> exp >> qi::token(k_rightParen)
               |
               qi::token(k_alphaTerminal) >> qi::token(k_leftBracket) >> exp >> qi::token(k_rightBracket) 
               |
               qi::token(k_alphaTerminal) >> qi::token(k_leftParen) >> qi::token(k_rightParen)
               |
               qi::token(k_alphaTerminal) >> qi::token(k_leftParen) >> exp >> qi::token(k_rightParen)
               | 
               qi::eps;

        // parameter list
        paramList %= exp >> paramListPrime;

        paramListPrime %= qi::token(k_comma) >> exp >> paramListPrime
                          |
                          qi::eps;

        // return statements
        returnStatement %= qi::token(k_returnTok) >> exp
                           |
                           qi::token(k_returnTok);

        // function call statements
        callStatement %= qi::token(k_alphaTerminal) >> qi::token(k_leftParen) >> qi::token(k_rightParen)
                         |
                         qi::token(k_alphaTerminal) >> qi::token(k_leftParen) >> paramList >> qi::token(k_rightParen);

        // variable assignment
        assignmentStatement %= qi::token(k_alphaTerminal) >> qi::token(k_assign) >> exp
                               |
                               qi::token(k_alphaTerminal) >> qi::token(k_leftBracket) >> exp
                                   >> qi::token(k_rightBracket) >> qi::token(k_assign) >> exp;

        // list of integers
        intList %= qi::token(k_numericTerminal) >> intListPrime;

        intListPrime %= 
                  qi::token(k_comma) >> qi::token(k_numericTerminal) >> intListPrime
                  |
                  qi::eps;

        // print out a variable
        printStatement %= qi::token(k_print) >> exp;

        // take input
        inputStatement %= qi::token(k_alphaTerminal) >> qi::token(k_input);

        // conditional statement
        conditionStatement %= qi::token(k_ifTok) >> exp >> qi::token(k_colon) >> statements >> optionalElse;

        // consitions have optional else
        optionalElse %= qi::token(k_elseTok) >> qi::token(k_colon) >> statements
                        |
                        qi::eps;

        // while loop
        whileStatement %= qi::token(k_whileTok) >> exp >> qi::token(k_colon) >> statements >> qi::token(k_elihw);

        // actual program statements
        endList %= end >> endListPrime;

        endListPrime %= end >> endListPrime
                        |
                        qi::eps;

        // end possibilities of program in global space
        end %= callStatement
               |
               printStatement
               |
               qi::token(k_alphaTerminal) >> qi::token(k_assign) >> qi::token(k_input)
               |
               qi::token(k_alphaTerminal) >> qi::token(k_assign) >> exp
               |
               qi::token(k_alphaTerminal) >> qi::token(k_assign) >> qi::token(k_leftBracket) >> intList
                   >> qi::token(k_rightBracket)
               |
               qi::token(k_alphaTerminal) >> qi::token(k_leftBracket) >> exp >> qi::token(k_rightBracket)
                   >> qi::token(k_assign) >> exp;

        // function parameters
        param %=
                qi::token(k_alphaTerminal) >> paramPrime
                |
                qi::token(k_alphaTerminal) >> qi::token(k_leftBracket) >> qi::token(k_rightBracket)
                    >> paramPrime;

        // for handling left recursion in paramlist
        paramPrime %= 
                    qi::token(k_comma) >> qi::token(k_alphaTerminal) >> paramPrime
                    | 
                    qi::eps;


        // define a statement as assignment print input condition while or call
        statement %= 
                    assignmentStatement
                    |
                    printStatement
                    |
                    inputStatement
                    |
                    conditionStatement
                    |
                    whileStatement
                    |
                    callStatement
                    |
                    returnStatement;

        // general statement list
        statements %= statement >> statementsPrime;

        // for handling left recursion in statements
        statementsPrime %= statement >> statementsPrime
                           |
                           qi::eps;

        // functions
        functionList %= qi::token(k_def) >> qi::token(k_alphaTerminal) >> qi::token(k_leftParen)
                            >> param >> qi::token(k_rightParen) >> qi::token(k_colon)
                            >> statements >> qi::token(k_fed)
                        |
                        qi::token(k_def) >> qi::token(k_alphaTerminal) >> qi::token(k_leftParen)
                            >> qi::token(k_rightParen) >> qi::token(k_colon) >> statements >> qi::token(k_fed)
                        | qi::eps;

        BOOST_SPIRIT_DEBUG_NODES((start)(functionList));
        debug(start);
    }
    qi::rule<Iterator, Skipper> start;
    qi::rule<Iterator, Skipper> functionList;
    qi::rule<Iterator, Skipper> endList;
    qi::rule<Iterator, Skipper> endListPrime;
    qi::rule<Iterator, Skipper> param;
    qi::rule<Iterator, Skipper> paramPrime;
    qi::rule<Iterator, Skipper> paramList;
    qi::rule<Iterator, Skipper> paramListPrime;
    qi::rule<Iterator, Skipper> statements;
    qi::rule<Iterator, Skipper> statementsPrime;
    qi::rule<Iterator, Skipper> statement;
    qi::rule<Iterator, Skipper> assignmentStatement;
    qi::rule<Iterator, Skipper> printStatement;
    qi::rule<Iterator, Skipper> inputStatement;
    qi::rule<Iterator, Skipper> conditionStatement;
    qi::rule<Iterator, Skipper> whileStatement;
    qi::rule<Iterator, Skipper> callStatement;
    qi::rule<Iterator, Skipper> returnStatement;
    qi::rule<Iterator, Skipper> exp;
    qi::rule<Iterator, Skipper> expPrime;
    qi::rule<Iterator, Skipper> intList;
    qi::rule<Iterator, Skipper> intListPrime;
    qi::rule<Iterator, Skipper> optionalElse;
    qi::rule<Iterator, Skipper> end;
};

}

这是词法分析器

#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_statement.hpp>
#include <boost/spirit/include/phoenix_container.hpp>
#include <iostream>
#include <fstream>
#include <streambuf>

#include <boost/bind.hpp>
#include <boost/ref.hpp>

namespace interpreter
{
    namespace lex = boost::spirit::lex;

    enum Tokens
    {
        k_andTok = 1,
        k_def = 2,
        k_elihw = 3,
        k_elseTok = 4,
        k_falseTok = 5,
        k_fed = 6,
        k_fi = 7,
        k_ifTok = 8,
        k_input = 9,
        k_notTok = 10,
        k_orTok = 11,
        k_print = 12,
        k_returnTok = 13,
        k_trueTok = 14,
        k_whileTok = 15,
        k_plues = 16,
        k_minus = 17,
        k_mult = 18,
        k_div = 19,
        k_bang = 20,
        k_equalTo = 21,
        k_greaterEq = 22,
        k_lessEq = 23,
        k_notEq = 24,
        k_less = 25,
        k_greater = 26,
        k_assign = 27,
        k_comma = 28,
        k_colon = 29,
        k_leftParen = 30,
        k_rightParen = 31,
        k_leftBracket = 32,
        k_rightBracket = 33,
        k_alphaTerminal = 34,
        k_numericTerminal = 35
    };

    template <typename Lexer>
    struct LexerTokens : lex::lexer<Lexer>
    {
        LexerTokens() :
           whiteSpace("[ \\t\\n]"),
           andTok("and"),
           def("def"),
           elihw("elihw"),
           elseTok("else"),
           falseTok("false"),
           fed("fed"),
           fi("fi"),
           ifTok("if"),
           input("input"),
           notTok("not"),
           orTok("or"),
           print("print"),
           returnTok("return"),
           trueTok("true"),
           whileTok("while"),
           plus("\\+"),
           minus("\\-"),
           mult("\\*"),
           div("\\/"),
           bang("\\!"),
           equalTo("=="),
           greaterEq(">="),
           lessEq("<="),
           notEq("!="),
           less("<"),
           greater(">"),
           assign("="),
           comma(","),
           colon(":"),
           leftParen("\\("),
           rightParen("\\)"),
           leftBracket("\\["),
           rightBracket("\\["),
           alphaTerminal("[a-z][a-zA-Z0-9]*"),
           numericTerminal("[0-9]*")
        {
            this->self("WHITESPACE") = whiteSpace;

            this->self.add
                (andTok, k_andTok)
                (def, k_def)
                (elihw, k_elihw)
                (elseTok, k_elseTok)
                (falseTok, k_falseTok)
                (fed, k_fed)
                (fi, k_fi)
                (ifTok, k_ifTok)
                (andTok, k_andTok)
                (input, k_input)
                (notTok, k_notTok)
                (orTok, k_orTok)
                (print, k_print)
                (returnTok, k_returnTok)
                (trueTok, k_trueTok)
                (whileTok, k_whileTok)
                (plus, k_plues)
                (minus, k_minus)
                (mult, k_mult)
                (div, k_div)
                (bang, k_bang)
                (equalTo, k_equalTo)
                (greaterEq, k_greaterEq)
                (lessEq, k_lessEq)
                (notEq, k_notEq)
                (less, k_less)
                (greater, k_greater)
                (assign, k_assign)
                (comma, k_comma)
                (colon, k_colon)
                (leftParen, k_leftParen)
                (rightParen, k_rightParen)
                (leftBracket, k_leftBracket)
                (rightBracket, k_rightBracket)
                (alphaTerminal, k_alphaTerminal)
                (numericTerminal, k_numericTerminal);
        }

        lex::token_def<lex::omit> whiteSpace;
        lex::token_def<std::string> andTok;
        lex::token_def<std::string> def;
        lex::token_def<std::string> elihw;
        lex::token_def<std::string> elseTok;
        lex::token_def<std::string> falseTok;
        lex::token_def<std::string> fed;
        lex::token_def<std::string> fi;
        lex::token_def<std::string> ifTok;
        lex::token_def<std::string> input;
        lex::token_def<std::string> notTok;
        lex::token_def<std::string> orTok;
        lex::token_def<std::string> print;
        lex::token_def<std::string> returnTok;
        lex::token_def<std::string> trueTok;
        lex::token_def<std::string> whileTok;
        lex::token_def<std::string> plus;
        lex::token_def<std::string> minus;
        lex::token_def<std::string> mult;
        lex::token_def<std::string> div;
        lex::token_def<std::string> bang;
        lex::token_def<std::string> equalTo;
        lex::token_def<std::string> greaterEq;
        lex::token_def<std::string> lessEq;
        lex::token_def<std::string> notEq;
        lex::token_def<std::string> less;
        lex::token_def<std::string> greater;
        lex::token_def<std::string> assign;
        lex::token_def<std::string> comma;
        lex::token_def<std::string> colon;
        lex::token_def<std::string> leftParen;
        lex::token_def<std::string> rightParen;
        lex::token_def<std::string> leftBracket;
        lex::token_def<std::string> rightBracket;
        lex::token_def<std::string> alphaTerminal;
        lex::token_def<std::string> numericTerminal;
    };

这是我的示例测试程序

    int main(int argc, char** argv)
    {
        namespace lex = boost::spirit::lex;
        namespace qi = boost::spirit::qi;

        typedef lex::lexertl::token< char const*, lex::omit, boost::mpl::true_ > token_type;
        typedef lex::lexertl::lexer<token_type> lexer_type;
        typedef interpreter::LexerTokens<lexer_type>::iterator_type iterator_type;
        typedef qi::in_state_skipper<interpreter::LexerTokens<lexer_type>::lexer_def> skipper_type;

        interpreter::LexerTokens< lexer_type > lexer;
        interpreter::InterpreterGrammar< iterator_type, skipper_type > parser(lexer);
        std::string sourceCode("def print_it(x, y): print 3*x + y return fed print_it(8,1) x = 3 print_it(x, x)"); 

        char const* first = sourceCode.c_str();
        char const* last = &first[sourceCode.size()];
        bool r = lex::tokenize_and_phrase_parse(first, last, lexer, parser, qi::in_state("WHITESPACE")[lexer.self]);

        std::cout << "Remaining " << std::string(first,last) << std::endl;
        std::cout << "R is " << r << std::endl;
    }

这些修改引起了一些问题,首先,通过非正式地查看语法,没有构建完整的 LL(1) 解析表,我不相信这个语法是 LL(1)。我仍然需要验证这一点,但我想知道精神,能够解析这个语法吗?我知道 PEG 通常使用 / 运算符来进行前瞻,spirit 目前是否这样做?我在另一篇文章中看到精神可能没有?

其次,这个语法失败了,我注意到,在开始生产时,我简化了开始生产并使其成为:

start %= functionList;

然后将输入更改为:

def print_it(x, y): print 3*x + y return fed

语法调试语句表明解析成功。但是,我看到剩下的字符串是:

print_it(x, y): print 3*x + y return fed

所以实际上只解析了第一个令牌。经过一些调试后,我不确定为什么解析成功以及为什么只使用一个符号,这可能是词法分析器的问题吗?

此外,当我将开始生产更改为:

start %= endList;

并使用输入

y = x

然而,这无法解析并且只消耗字符 y。

最后,我的调试语句的输出不是很有帮助,当使用调试语句运行时,产生的输出是:

<start>
  <try>[][][][][][][][][][][][][][][][][][][][]</try>
  <fail/>
</start>
Remaining  print_it(x, y): print 3*x + y return fed print_it(8,1) x = 3 print_it(x, x)
R is 0

我假设这意味着在语法中尝试了 20 个产生式,从 20 个空 [],这是一个正确的假设吗?还有为什么 [] 是空的?我通常会看到它们带有一些对调试有用的文本。是不是因为正则表达式匹配,语法就自动成功了?如果是这种情况,有没有办法让调试语句在使用令牌枚举令牌而不是使用宏添加表达式时打印有用的输出?

感谢您提供正确方向的任何帮助或点,谢谢。

4

1 回答 1

2

我假设这意味着在语法中尝试了 20 个产生式,从 20 个空 [],这是一个正确的假设吗?

否。[]指示输入令牌。

还有为什么 [] 是空的?

可能没有有用的方法来打印它们,因此它们显示为空。

如果是这种情况,有没有办法让调试语句在使用令牌枚举令牌而不是使用宏添加表达式时打印有用的输出?

我会这么认为。但我从不使用 Lex。因此,可能需要一段时间才能弄清楚。

首先引起我注意的是:

typedef lex::lexertl::token< char const*, lex::omit, boost::mpl::true_ > token_type;

你的 AttributeTypes 说omit。确实,改为

typedef lex::lexertl::token< char const*, boost::mpl::vector<lex::omit, std::string>, boost::mpl::true_ > token_type;

确实显示出生命迹象。对于输入x=y(没有空格!)它打印:

Live On Coliru

<start>
  <try>[y][=][x][][][][][][][][][][][][][][][][][]</try>
  <fail/>
</start>
Remaining 

R is false

现在,对于def print_it(x, y): print 3*x + y return fed输入,输出仍然是:

<start>
  <try>[def][][][][][][][][][][][][][][][][][][][]</try>
  <fail/>
</start>
Remaining  print_it(x, y): print 3*x + y return fed

R is false

信息量稍大一些。有趣的是,它似乎在第一个空格上也失败了。正whiteSpace则表达式看起来不错,所以我在答案中搜索lex in_state以记住我曾经学到的关于跳过和 Lex 的知识。

我玩弄了一些建议。在第二篇文章之后,我看到了这条评论:

好的!您可以添加到您的答案中,在词法分析器中有一个用于跳过空格的单独状态有另一个相当大的缺点:缺乏调试支持。如果您使用单独的状态进行跳过,然后使用 BOOST_SPIRIT_DEBUG_NODE,您将无法获得令牌流的完整输出。这是因为默认调试处理程序推进词法分析器迭代器以获取令牌,词法分析器当然处于初始状态。一旦遇到空格,迭代就会停止,因为词法分析器找不到匹配项。令牌流将在规则跟踪中的第一个空白处被剪切。– noxmetus 2017 年 9 月 20 日 18:28

让我们,只是为了好玩,尝试不使用单独的状态,而是使用Troubles 中的带有 boost::spirit::lex & whitespace的状态。

现在,表达式不会解析,因为

  • print_it不是有效的标识符。让我们添加_alphaTerminal
  • alphaTerminal需要=遵循。稍微修正一下表达式:

    exp = (
                qi::token(k_alphaTerminal)
              | qi::token(k_numericTerminal)
              | qi::token(k_trueTok)
              | qi::token(k_falseTok))
        >> expPrime
        ;
    
  • 事实上,这些令牌 ID 无效。您需要从min_token_id(即 0x10000)开始:

    enum Tokens
    {
        k_andTok = lex::tokenids::min_token_id + 1,
        k_def, k_elihw, k_elseTok, k_falseTok, k_fed, k_fi, k_ifTok, k_input,
        k_notTok, k_orTok, k_print, k_returnTok, k_trueTok, k_whileTok,
        k_plues, k_minus, k_mult, k_div, k_bang, k_equalTo, k_greaterEq,
        k_lessEq, k_notEq, k_less, k_greater, k_assign, k_comma, k_colon,
        k_leftParen, k_rightParen, k_leftBracket, k_rightBracket,
        k_alphaTerminal, k_numericTerminal,
    };
    
  • 为什么还要指定令牌 ID?从您将令牌定义的引用传递给解析器这一事实猜测,我认为您会使用它们吗?

    exp
        = (tok.alphaTerminal | tok.numericTerminal | tok.trueTok | tok.falseTok) >> expPrime
        ;
    
  • 另外,让我们为所有规则启用调试:

    BOOST_SPIRIT_DEBUG_NODES(
        (start) (functionList) (endList) (endListPrime) (param)
        (paramPrime) (paramList) (paramListPrime) (statements)
        (statementsPrime) (statement) (assignmentStatement)
        (printStatement) (inputStatement) (conditionStatement)
        (whileStatement) (callStatement) (returnStatement) (exp)
        (expPrime) (intList) (intListPrime) (optionalElse) (end)
    )
    

而且,在花了太多时间试图理解为什么状态切换的船长似乎不起作用之后,我放弃了。这是我修修补补的结果:

Live On Coliru

//#define USE_STATES
#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/lex.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/phoenix.hpp>
namespace lex = boost::spirit::lex;

namespace interpreter {
    enum Tokens
    {
        k_andTok = lex::tokenids::min_token_id + 1,
        k_def, k_elihw, k_elseTok, k_falseTok, k_fed, k_fi, k_ifTok, k_input,
        k_notTok, k_orTok, k_print, k_returnTok, k_trueTok, k_whileTok,
        k_plues, k_minus, k_mult, k_div, k_bang, k_equalTo, k_greaterEq,
        k_lessEq, k_notEq, k_less, k_greater, k_assign, k_comma, k_colon,
        k_leftParen, k_rightParen, k_leftBracket, k_rightBracket,
        k_alphaTerminal, k_numericTerminal,
    };

    template <typename Lexer>
    struct LexerTokens : lex::lexer<Lexer>
    {
        LexerTokens() :
           whiteSpace("[ \\t\\n]"),
           andTok("and"),
           def("def"),
           elihw("elihw"),
           elseTok("else"),
           falseTok("false"),
           fed("fed"),
           fi("fi"),
           ifTok("if"),
           input("input"),
           notTok("not"),
           orTok("or"),
           print("print"),
           returnTok("return"),
           trueTok("true"),
           whileTok("while"),
           plus("\\+"),
           minus("\\-"),
           mult("\\*"),
           div("\\/"),
           bang("\\!"),
           equalTo("=="),
           greaterEq(">="),
           lessEq("<="),
           notEq("!="),
           less("<"),
           greater(">"),
           assign("="),
           comma(","),
           colon(":"),
           leftParen("\\("),
           rightParen("\\)"),
           leftBracket("\\["),
           rightBracket("\\["),
           alphaTerminal("[a-z][a-zA-Z0-9_]*"),
           numericTerminal("[0-9]*")
        {
            this->self.add
                (andTok, k_andTok)
                (def, k_def)
                (elihw, k_elihw)
                (elseTok, k_elseTok)
                (falseTok, k_falseTok)
                (fed, k_fed)
                (fi, k_fi)
                (ifTok, k_ifTok)
                (andTok, k_andTok)
                (input, k_input)
                (notTok, k_notTok)
                (orTok, k_orTok)
                (print, k_print)
                (returnTok, k_returnTok)
                (trueTok, k_trueTok)
                (whileTok, k_whileTok)
                (plus, k_plues)
                (minus, k_minus)
                (mult, k_mult)
                (div, k_div)
                (bang, k_bang)
                (equalTo, k_equalTo)
                (greaterEq, k_greaterEq)
                (lessEq, k_lessEq)
                (notEq, k_notEq)
                (less, k_less)
                (greater, k_greater)
                (assign, k_assign)
                (comma, k_comma)
                (colon, k_colon)
                (leftParen, k_leftParen)
                (rightParen, k_rightParen)
                (leftBracket, k_leftBracket)
                (rightBracket, k_rightBracket)
                (alphaTerminal, k_alphaTerminal)
                (numericTerminal, k_numericTerminal);

#ifdef USE_STATES
            this->self("WHITESPACE") = whiteSpace;
#else
            this->self += whiteSpace [ lex::_pass = lex::pass_flags::pass_ignore ];
#endif
        }

        lex::token_def<lex::omit> whiteSpace;
        lex::token_def<std::string> andTok;
        lex::token_def<std::string> def;
        lex::token_def<std::string> elihw;
        lex::token_def<std::string> elseTok;
        lex::token_def<std::string> falseTok;
        lex::token_def<std::string> fed;
        lex::token_def<std::string> fi;
        lex::token_def<std::string> ifTok;
        lex::token_def<std::string> input;
        lex::token_def<std::string> notTok;
        lex::token_def<std::string> orTok;
        lex::token_def<std::string> print;
        lex::token_def<std::string> returnTok;
        lex::token_def<std::string> trueTok;
        lex::token_def<std::string> whileTok;
        lex::token_def<std::string> plus;
        lex::token_def<std::string> minus;
        lex::token_def<std::string> mult;
        lex::token_def<std::string> div;
        lex::token_def<std::string> bang;
        lex::token_def<std::string> equalTo;
        lex::token_def<std::string> greaterEq;
        lex::token_def<std::string> lessEq;
        lex::token_def<std::string> notEq;
        lex::token_def<std::string> less;
        lex::token_def<std::string> greater;
        lex::token_def<std::string> assign;
        lex::token_def<std::string> comma;
        lex::token_def<std::string> colon;
        lex::token_def<std::string> leftParen;
        lex::token_def<std::string> rightParen;
        lex::token_def<std::string> leftBracket;
        lex::token_def<std::string> rightBracket;
        lex::token_def<std::string> alphaTerminal;
        lex::token_def<std::string> numericTerminal;
    };

    namespace qi = boost::spirit::qi;
    template <typename Iterator, typename Skipper>
    struct InterpreterGrammar : qi::grammar<Iterator, Skipper>
    {
        template <typename TokenDef>
        InterpreterGrammar(TokenDef const& tok)
            : InterpreterGrammar::base_type(start)
        {
            start
                = functionList >> endList >> qi::eoi
                ;

            // different expressions
            exp
                = (tok.alphaTerminal | tok.numericTerminal | tok.trueTok | tok.falseTok) >> expPrime
                ;

            expPrime
                = tok.equalTo   >> exp >> expPrime
                | tok.notEq     >> exp >> expPrime
                | tok.less      >> exp >> expPrime
                | tok.lessEq    >> exp >> expPrime
                | tok.greater   >> exp >> expPrime
                | tok.greaterEq >> exp >> expPrime
                | tok.andTok    >> exp >> expPrime
                | tok.orTok     >> exp >> expPrime
                | tok.notTok    >> exp
                | tok.plus      >> exp >> expPrime
                | tok.minus     >> exp >> expPrime
                | tok.mult      >> exp >> expPrime
                | tok.minus >> exp
                | tok.leftParen >> exp >> tok.rightParen
                | tok.alphaTerminal >> tok.leftBracket >> exp >> tok.rightBracket
                | tok.alphaTerminal >> tok.leftParen >> tok.rightParen
                | tok.alphaTerminal >> tok.leftParen >> exp >> tok.rightParen
                | qi::eps
                ;

            // parameter list
            paramList
                = exp >> paramListPrime
                ;

            paramListPrime
                = tok.comma >> exp >> paramListPrime
                | qi::eps
                ;

            // return statements
            returnStatement
                = tok.returnTok >> exp
                | tok.returnTok
                ;

            // function call statements
            callStatement
                = tok.alphaTerminal >> tok.leftParen >> tok.rightParen
                | tok.alphaTerminal >> tok.leftParen >> paramList >> tok.rightParen
                ;

            // variable assignment
            assignmentStatement
                = tok.alphaTerminal >> tok.assign >> exp
                | tok.alphaTerminal >> tok.leftBracket >> exp
                    >> tok.rightBracket >> tok.assign >> exp
                ;

            // list of integers
            intList
                = tok.numericTerminal >> intListPrime
                ;

            intListPrime
                = tok.comma >> tok.numericTerminal >> intListPrime
                | qi::eps
                ;

            // print out a variable
            printStatement
                = tok.print >> exp
                ;

            // take input
            inputStatement
                = tok.alphaTerminal >> tok.input
                ;

            // conditional statement
            conditionStatement
                = tok.ifTok >> exp >> tok.colon >> statements >> optionalElse
                ;

            // consitions have optional else
            optionalElse
                = tok.elseTok >> tok.colon >> statements
                | qi::eps
                ;

            // while loop
            whileStatement
                = tok.whileTok >> exp >> tok.colon >> statements >> tok.elihw
                ;

            // actual program statements
            endList
                = end >> endListPrime
                ;

            endListPrime
                = end >> endListPrime
                | qi::eps
                ;

            // end possibilities of program in global space
            end
                = callStatement
                | printStatement
                | tok.alphaTerminal >> tok.assign >> tok.input
                | tok.alphaTerminal >> tok.assign >> exp
                | tok.alphaTerminal >> tok.assign >> tok.leftBracket >> intList
                    >> tok.rightBracket
                | tok.alphaTerminal >> tok.leftBracket >> exp >> tok.rightBracket
                    >> tok.assign >> exp
                ;

            // function parameters
            param
                = tok.alphaTerminal >> paramPrime
                | tok.alphaTerminal >> tok.leftBracket >> tok.rightBracket
                    >> paramPrime
                ;

            // for handling left recursion in paramlist
            paramPrime
                = tok.comma >> tok.alphaTerminal >> paramPrime
                | qi::eps
                ;

            // define a statement as assignment print input condition while or call
            statement
                = assignmentStatement
                | printStatement
                | inputStatement
                | conditionStatement
                | whileStatement
                | callStatement
                | returnStatement
                ;

            // general statement list
            statements
                = statement >> statementsPrime
                ;

            // for handling left recursion in statements
            statementsPrime
                = statement >> statementsPrime
                | qi::eps
                ;

            // functions
            functionList
                = tok.def >> tok.alphaTerminal >> tok.leftParen
                    >> param >> tok.rightParen >> tok.colon
                    >> statements >> tok.fed
                | tok.def >> tok.alphaTerminal >> tok.leftParen
                    >> tok.rightParen >> tok.colon >> statements >> tok.fed
                | qi::eps
                ;

            BOOST_SPIRIT_DEBUG_NODES(
                (start) (functionList) (endList) (endListPrime) (param)
                (paramPrime) (paramList) (paramListPrime) (statements)
                (statementsPrime) (statement) (assignmentStatement)
                (printStatement) (inputStatement) (conditionStatement)
                (whileStatement) (callStatement) (returnStatement) (exp)
                (expPrime) (intList) (intListPrime) (optionalElse) (end)
            )

        }
      private:
        qi::rule<Iterator, Skipper> start;
        qi::rule<Iterator, Skipper> functionList;
        qi::rule<Iterator, Skipper> endList;
        qi::rule<Iterator, Skipper> endListPrime;
        qi::rule<Iterator, Skipper> param;
        qi::rule<Iterator, Skipper> paramPrime;
        qi::rule<Iterator, Skipper> paramList;
        qi::rule<Iterator, Skipper> paramListPrime;
        qi::rule<Iterator, Skipper> statements;
        qi::rule<Iterator, Skipper> statementsPrime;
        qi::rule<Iterator, Skipper> statement;
        qi::rule<Iterator, Skipper> assignmentStatement;
        qi::rule<Iterator, Skipper> printStatement;
        qi::rule<Iterator, Skipper> inputStatement;
        qi::rule<Iterator, Skipper> conditionStatement;
        qi::rule<Iterator, Skipper> whileStatement;
        qi::rule<Iterator, Skipper> callStatement;
        qi::rule<Iterator, Skipper> returnStatement;
        qi::rule<Iterator, Skipper> exp;
        qi::rule<Iterator, Skipper> expPrime;
        qi::rule<Iterator, Skipper> intList;
        qi::rule<Iterator, Skipper> intListPrime;
        qi::rule<Iterator, Skipper> optionalElse;
        qi::rule<Iterator, Skipper> end;
    };
}

#include <fstream>
#include <iterator>

int main(int argc, char** argv) {
    namespace lex = boost::spirit::lex;
    namespace qi = boost::spirit::qi;

    typedef lex::lexertl::token<char const*, boost::mpl::vector<lex::omit, std::string>, boost::mpl::true_> token_type;
#ifdef USE_STATES
    typedef lex::lexertl::actor_lexer<token_type> lexer_type;
    typedef qi::in_state_skipper<interpreter::LexerTokens<lexer_type>::lexer_def> skipper_type;
    typedef interpreter::LexerTokens<lexer_type>::iterator_type iterator_type;
#else
    typedef lex::lexertl::actor_lexer<token_type> lexer_type;
    typedef interpreter::LexerTokens<lexer_type>::iterator_type iterator_type;
    typedef qi::unused_type skipper_type;
#endif

    interpreter::LexerTokens<lexer_type> lexer;
    interpreter::InterpreterGrammar<iterator_type, skipper_type> parser(lexer);

    // read the file
    if (argc != 2)
    {
        std::cout << "File required" << std::endl;
        return 1;
    }

    std::ifstream t(argv[1]);
    std::string const sourceCode { std::istreambuf_iterator<char>(t), {} };

    char const* first = sourceCode.data();
    char const* last = first + sourceCode.size();
#ifdef USE_STATES
    bool ok = lex::tokenize_and_phrase_parse(first, last, lexer, parser, qi::in_state("WHITESPACE")[lexer.self]);
#else
    bool ok = lex::tokenize_and_parse(first, last, lexer, parser);
#endif

    std::cout << "Remaining '" << std::string(first,last) << "'" << std::endl;
    std::cout << "R is " << std::boolalpha << ok << std::endl;
}

印刷

<start>
  <try>[def][print_it][(][x][,][y][)][:][print][3][*][x][+][y][return][fed]</try>
  <functionList>
    <try>[def][print_it][(][x][,][y][)][:][print][3][*][x][+][y][return][fed]</try>
    <param>
      <try>[x][,][y][)][:][print][3][*][x][+][y][return][fed]</try>
      <paramPrime>
        <try>[,][y][)][:][print][3][*][x][+][y][return][fed]</try>
        <paramPrime>
          <try>[)][:][print][3][*][x][+][y][return][fed]</try>
          <success>[)][:][print][3][*][x][+][y][return][fed]</success>
          <attributes>[]</attributes>
        </paramPrime>
        <success>[)][:][print][3][*][x][+][y][return][fed]</success>
        <attributes>[]</attributes>
      </paramPrime>
      <success>[)][:][print][3][*][x][+][y][return][fed]</success>
      <attributes>[]</attributes>
    </param>
    <statements>
      <try>[print][3][*][x][+][y][return][fed]</try>
      <statement>
        <try>[print][3][*][x][+][y][return][fed]</try>
        <assignmentStatement>
          <try>[print][3][*][x][+][y][return][fed]</try>
          <fail/>
        </assignmentStatement>
        <printStatement>
          <try>[print][3][*][x][+][y][return][fed]</try>
          <exp>
            <try>[3][*][x][+][y][return][fed]</try>
            <expPrime>
              <try>[*][x][+][y][return][fed]</try>
              <exp>
                <try>[x][+][y][return][fed]</try>
                <expPrime>
                  <try>[+][y][return][fed]</try>
                  <exp>
                    <try>[y][return][fed]</try>
                    <expPrime>
                      <try>[return][fed]</try>
                      <success>[return][fed]</success>
                      <attributes>[]</attributes>
                    </expPrime>
                    <success>[return][fed]</success>
                    <attributes>[]</attributes>
                  </exp>
                  <expPrime>
                    <try>[return][fed]</try>
                    <success>[return][fed]</success>
                    <attributes>[]</attributes>
                  </expPrime>
                  <success>[return][fed]</success>
                  <attributes>[]</attributes>
                </expPrime>
                <success>[return][fed]</success>
                <attributes>[]</attributes>
              </exp>
              <expPrime>
                <try>[return][fed]</try>
                <success>[return][fed]</success>
                <attributes>[]</attributes>
              </expPrime>
              <success>[return][fed]</success>
              <attributes>[]</attributes>
            </expPrime>
            <success>[return][fed]</success>
            <attributes>[]</attributes>
          </exp>
          <success>[return][fed]</success>
          <attributes>[]</attributes>
        </printStatement>
        <success>[return][fed]</success>
        <attributes>[]</attributes>
      </statement>
      <statementsPrime>
        <try>[return][fed]</try>
        <statement>
          <try>[return][fed]</try>
          <assignmentStatement>
            <try>[return][fed]</try>
            <fail/>
          </assignmentStatement>
          <printStatement>
            <try>[return][fed]</try>
            <fail/>
          </printStatement>
          <inputStatement>
            <try>[return][fed]</try>
            <fail/>
          </inputStatement>
          <conditionStatement>
            <try>[return][fed]</try>
            <fail/>
          </conditionStatement>
          <whileStatement>
            <try>[return][fed]</try>
            <fail/>
          </whileStatement>
          <callStatement>
            <try>[return][fed]</try>
            <fail/>
          </callStatement>
          <returnStatement>
            <try>[return][fed]</try>
            <exp>
              <try>[fed]</try>
              <fail/>
            </exp>
            <success>[fed]</success>
            <attributes>[]</attributes>
          </returnStatement>
          <success>[fed]</success>
          <attributes>[]</attributes>
        </statement>
        <statementsPrime>
          <try>[fed]</try>
          <statement>
            <try>[fed]</try>
            <assignmentStatement>
              <try>[fed]</try>
              <fail/>
            </assignmentStatement>
            <printStatement>
              <try>[fed]</try>
              <fail/>
            </printStatement>
            <inputStatement>
              <try>[fed]</try>
              <fail/>
            </inputStatement>
            <conditionStatement>
              <try>[fed]</try>
              <fail/>
            </conditionStatement>
            <whileStatement>
              <try>[fed]</try>
              <fail/>
            </whileStatement>
            <callStatement>
              <try>[fed]</try>
              <fail/>
            </callStatement>
            <returnStatement>
              <try>[fed]</try>
              <fail/>
            </returnStatement>
            <fail/>
          </statement>
          <success>[fed]</success>
          <attributes>[]</attributes>
        </statementsPrime>
        <success>[fed]</success>
        <attributes>[]</attributes>
      </statementsPrime>
      <success>[fed]</success>
      <attributes>[]</attributes>
    </statements>
    <success></success>
    <attributes>[]</attributes>
  </functionList>
  <endList>
    <try></try>
    <end>
      <try></try>
      <callStatement>
        <try></try>
        <fail/>
      </callStatement>
      <printStatement>
        <try></try>
        <fail/>
      </printStatement>
      <fail/>
    </end>
    <fail/>
  </endList>
  <fail/>
</start>
Remaining ''
R is false
于 2018-06-23T00:16:23.010 回答