c++ - 为什么 qi::skip 使用来自词法分析器的标记失败？

Question

我正在使用 boost::spirit lex 和 qi 来解析一些源代码。

我已经使用词法分析器从输入字符串中跳过了空格。我想做的是根据解析器中的上下文切换跳过注释。

这是一个基本的演示。有关我的问题，请参阅 Grammar::Grammar() 中的评论：

#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/phoenix.hpp>

#include <iostream>

namespace lex = boost::spirit::lex;
namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;

typedef lex::lexertl::token<char const*, boost::mpl::vector<std::string>, boost::mpl::false_ > token_type;
typedef lex::lexertl::actor_lexer<token_type> lexer_type;

struct TokenId
{
   enum type
   {
      INVALID_TOKEN_ID = lex::min_token_id,
      COMMENT
   };
};

struct Lexer : lex::lexer<lexer_type>
{
public:
   lex::token_def<std::string> comment;
   lex::token_def<std::string> identifier;
   lex::token_def<std::string> lineFeed;
   lex::token_def<std::string> space;

   Lexer()
   {
      comment = "\\/\\*.*?\\*\\/|\\/\\/[^\\r\\n]*";
      identifier = "[A-Za-z_][A-Za-z0-9_]*";
      space = "[\\x20\\t\\f\\v]+";
      lineFeed = "(\\r\\n)|\\r|\\n";

      this->self = space[lex::_pass = lex::pass_flags::pass_ignore];
      this->self += lineFeed[lex::_pass = lex::pass_flags::pass_ignore];
      this->self.add
         (comment, TokenId::COMMENT)
         (identifier)
         (';')
         ;
   }
};

typedef Lexer::iterator_type Iterator;

void traceComment(const std::string& content)
{
   std::cout << "  comment: " << content << std::endl;
}

class Grammar : public qi::grammar<Iterator>
{
   typedef token_type skipped_t;

   qi::rule<Iterator, qi::unused_type, qi::unused_type> m_start;
   qi::rule<Iterator, qi::unused_type, qi::unused_type, skipped_t> m_variable;
   qi::rule<Iterator, std::string(), qi::unused_type> m_comment;

public:
   Lexer lx;

public:
   Grammar() :
      Grammar::base_type(m_start)
   {
// This does not work (comments are not skipped in m_variable)
      m_start = *(
            m_comment[phx::bind(&traceComment, qi::_1)]
         |  qi::skip(qi::token(TokenId::COMMENT))[m_variable]
         );

      m_variable = lx.identifier >> lx.identifier >> ';';
      m_comment = qi::token(TokenId::COMMENT);
/** But this works:
      m_start = *(
         m_comment[phx::bind(&traceComment, qi::_1)]
         | m_variable
         );

      m_variable = qi::skip(qi::token(TokenId::COMMENT))[lx.identifier >> lx.identifier >> ';'];
      m_comment = qi::token(TokenId::COMMENT);
*/
   }
};

void test(const char* code)
{
   std::cout << code << std::endl;

   Grammar parser;
   const char* begin = code;
   const char* end = code + strlen(code);
   tokenize_and_parse(begin, end, parser.lx, parser);

   if (begin == end)
      std::cout << "-- OK --" << std::endl;
   else
      std::cout << "-- FAILED --" << std::endl;
   std::cout << std::endl;
}

int main(int argc, char* argv[])
{
   test("/* kept */ int foo;");
   test("int /* ignored */ foo;");
   test("int foo /* ignored */;");
   test("int foo; // kept");
}

输出是：

/* kept */ int foo;
  comment: /* kept */
-- OK --

int /* ignored */ foo;
-- FAILED --

int foo /* ignored */;
-- FAILED --

int foo; // kept
  comment: // kept
-- OK --

skipped_t 有什么问题吗？

score 2 · Accepted Answer

您所描述的行为是我对我的经验所期望的。

当你写

my_rule = qi::skip(ws) [ foo >> lit(',') >> bar >> lit('=') >> baz ];

这与写作基本相同

my_rule = *ws >> foo >> *ws >> lit(',') >> *ws >> bar >> *ws >> lit('=') >> *ws >> baz;

（假设这ws是没有属性的规则。如果它在您的语法中有属性，则该属性将被忽略，就像使用qi::omit.）

值得注意的是，船长不会在foo规则内传播。所以foo, bar, 和baz在上面仍然可以对空格敏感。skip 指令所做的是导致语法不关心此规则中的前导空格，或此规则中','and周围的空格'='。

更多信息在这里：http ://boost-spirit.com/home/2010/02/24/parsing-skippers-and-skipping-parsers/

编辑：

另外，我不认为skipped_t它正在做你认为它在那里的事情。

当您使用自定义跳过器时，最直接的方式是指定解析器的实际实例作为该规则的跳过解析器。当您使用类型而不是对象时，例如qi::skip(qi::blank_type)，这是一种速记，其中标记类型qi::blank_type已通过先前的模板声明链接到类型qi::blank，并且 qi 知道当它qi::blank_type在某些地方看到它应该实例化qi::blank解析器对象时。

我没有看到任何证据表明您实际上已经设置了该机器，您只是将类型定义skipped_t为token_type. 如果您希望它以这种方式工作（如果它甚至可能，我不知道），您应该做什么阅读关于 qi 自定义点，而不是声明qi::skipped_t为一个空结构，它通过一些模板样板链接到规则m_comment，哪个大概是您真正想要跳过的内容。（如果你跳过所有类型的所有标记，那么你不可能匹配任何东西，这样就没有意义了，所以我不确定你制作token_type船长的意图是什么。）

我的猜测是，当在参数列表中qi看到 typedeftoken_type时，它要么忽略它，要么将其解释为规则返回值的一部分或类似的东西，不确定它会做什么。

c++ - 为什么 qi::skip 使用来自词法分析器的标记失败？

1 回答 1

Related

Reference