3

这不会编译(下面的代码)。

这里还有另一个问题有同样的错误。但我不明白答案。我已经尝试在某些地方插入 qi::eps ——但没有成功。

我还尝试为使用的类型添加元函数(boost::spirit::raits::is_container)——但这也无济于事。

我还尝试使用相同的变体,其中包含我需要在任何地方使用的所有类型。同样的问题。

有没有人让这个词法分析器工作,返回除了 double 或 int 或 string 以外的东西?并且对于解析器还返回非平凡的对象?

我尝试在所有返回默认对象的地方实现语义函数。但这也无济于事。

代码如下:

// spirit_error.cpp : Defines the entry point for the console application.
//

#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/phoenix/object.hpp>
#include <boost/spirit/include/qi_char_class.hpp>
#include <boost/spirit/include/phoenix_bind.hpp>
#include <boost/mpl/index_of.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/intrusive_ptr.hpp>
#include <boost/smart_ptr/intrusive_ref_counter.hpp>

namespace lex = boost::spirit::lex;
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;


namespace frank
{
class ref_counter:public boost::intrusive_ref_counter<ref_counter>
{   public:
    virtual ~ref_counter(void)
    {
    }
};
class symbol:public ref_counter
{   public:
    typedef boost::intrusive_ptr<const symbol> symbolPtr;
    typedef std::vector<symbolPtr> symbolVector;
    struct push_scope
    {   push_scope()
        {
        }
        ~push_scope(void)
        {
        }
    };
};
class nature:public symbol
{   public:
    enum enumAttribute
    {   eAbstol,
        eAccess,
        eDDT,
        eIDT,
        eUnits
    };
    struct empty
    {   bool operator<(const empty&) const
        {   return false;
        }
        friend std::ostream &operator<<(std::ostream &_r, const empty&)
        {   return _r;
        }
    };
    typedef boost::variant<empty, std::string> attributeValue;
};
class discipline:public symbol
{   public:
    enum enumDomain
    {   eDiscrete,
        eContinuous
    };
};

class type:public ref_counter
{   public:
    typedef boost::intrusive_ptr<type> typePtr;
};
struct myIterator:std::iterator<std::random_access_iterator_tag, char, std::ptrdiff_t, const char*, const char&>
{   std::string *m_p;
    std::size_t m_iPos;
    myIterator(void)
        :m_p(nullptr),
        m_iPos(~std::size_t(0))
    {
    }
    myIterator(std::string &_r, const bool _bEnd = false)
        :m_p(&_r),
        m_iPos(_bEnd ? ~std::size_t(0) : 0)
    {
    }
    myIterator(const myIterator &_r)
        :m_p(_r.m_p),
        m_iPos(_r.m_iPos)
    {
    }
    myIterator &operator=(const myIterator &_r)
    {   if (this != &_r)
        {   m_p = _r.m_p;
            m_iPos = _r.m_iPos;
        }
        return *this;
    }
    const char &operator*(void) const
    {   return m_p->at(m_iPos);
    }
    bool operator==(const myIterator &_r) const
    {   return m_p == _r.m_p && m_iPos == _r.m_iPos;
    }
    bool operator!=(const myIterator &_r) const
    {   return m_p != _r.m_p || m_iPos != _r.m_iPos;
    }
    myIterator &operator++(void)
    {   ++m_iPos;
        if (m_iPos == m_p->size())
            m_iPos = ~std::size_t(0);
        return *this;
    }
    myIterator operator++(int)
    {   const myIterator s(*this);
        operator++();
        return s;
    }
    myIterator &operator--(void)
    {   --m_iPos;
        return *this;
    }
    myIterator operator--(int)
    {   const myIterator s(*this);
        operator--();
        return s;
    }
    bool operator<(const myIterator &_r) const
    {   if (m_p == _r.m_p)
            return m_iPos < _r.m_iPos;
        else
            return m_p < _r.m_p;
    }
    std::ptrdiff_t operator-(const myIterator &_r) const
    {   return m_iPos - _r.m_iPos;
    }
};
struct onInclude
{   auto operator()(myIterator &_rStart, myIterator &_rEnd) const
    {       // erase what has been matched (the include statement)
        _rStart.m_p->erase(_rStart.m_iPos, _rEnd.m_iPos - _rStart.m_iPos);
        // and insert the contents of the file
        _rStart.m_p->insert(_rStart.m_iPos, "abcd");
        _rEnd = _rStart;
        return lex::pass_flags::pass_ignore;
    }
};
template<typename LEXER>
class lexer:public lex::lexer<LEXER>
{   public:
    lex::token_def<type::typePtr> m_sKW_real, m_sKW_integer, m_sKW_string;
    lex::token_def<lex::omit> m_sLineComment, m_sCComment;
    lex::token_def<lex::omit> m_sWS;
    lex::token_def<lex::omit> m_sSemicolon, m_sEqual, m_sColon, m_sInclude, m_sCharOP, m_sCharCP,
        m_sComma;
    lex::token_def<std::string> m_sIdentifier, m_sString;
    lex::token_def<double> m_sReal;
    lex::token_def<int> m_sInteger;
    lex::token_def<lex::omit> m_sKW_units, m_sKW_access, m_sKW_idt_nature, m_sKW_ddt_nature, m_sKW_abstol,
        m_sKW_nature, m_sKW_endnature, m_sKW_continuous, m_sKW_discrete,
        m_sKW_potential, m_sKW_flow, m_sKW_domain, m_sKW_discipline, m_sKW_enddiscipline, m_sKW_module,
        m_sKW_endmodule, m_sKW_parameter;
    //typedef const type *typePtr;
    template<typename T>
    struct extractValue
    {   T operator()(const myIterator &_rStart, const myIterator &_rEnd) const
        {   return boost::lexical_cast<T>(std::string(_rStart, _rEnd));
        }
    };
    struct extractString
    {   std::string operator()(const myIterator &_rStart, const myIterator &_rEnd) const
        {   const auto s = std::string(_rStart, _rEnd);
            return s.substr(1, s.size() - 2);
        }
    };
    lexer(void)
        :m_sWS("[ \\t\\n\\r]+"),
        m_sKW_parameter("\"parameter\""),
        m_sKW_real("\"real\""),
        m_sKW_integer("\"integer\""),
        m_sKW_string("\"string\""),
        m_sLineComment("\\/\\/[^\\n]*"),
        m_sCComment("\\/\\*"
            "("
                "[^*]"
                    "|" "[\\n]"
                    "|" "([*][^/])"
            ")*"
            "\\*\\/"),
        m_sSemicolon("\";\""),
        m_sEqual("\"=\""),
        m_sColon("\":\""),
        m_sCharOP("\"(\""), 
        m_sCharCP("\")\""),
        m_sComma("\",\""),
        m_sIdentifier("[a-zA-Z_]+[a-zA-Z0-9_]*"),
        m_sString("[\\\"]"
            //"("
            //  "(\\[\"])"
            //  "|"
                //"[^\"]"
            //")*"
            "[^\\\"]*"
            "[\\\"]"),
        m_sKW_units("\"units\""),
        m_sKW_access("\"access\""),
        m_sKW_idt_nature("\"idt_nature\""),
        m_sKW_ddt_nature("\"ddt_nature\""),
        m_sKW_abstol("\"abstol\""),
        m_sKW_nature("\"nature\""),
        m_sKW_endnature("\"endnature\""),
        m_sKW_continuous("\"continuous\""),
        m_sKW_discrete("\"discrete\""),
        m_sKW_domain("\"domain\""),
        m_sKW_discipline("\"discipline\""),
        m_sKW_enddiscipline("\"enddiscipline\""),
        m_sKW_potential("\"potential\""),
        m_sKW_flow("\"flow\""),
//realnumber      ({uint}{exponent})|((({uint}\.{uint})|(\.{uint})){exponent}?)
//exponent        [Ee][+-]?{uint}
//uint            [0-9][_0-9]*

        m_sReal("({uint}{exponent})"
            "|"
                "("
                    "(({uint}[\\.]{uint})|([\\.]{uint})){exponent}?"
                ")"
        ),
        m_sInteger("{uint}"),
        m_sInclude("\"`include\""),
        m_sKW_module("\"module\""),
        m_sKW_endmodule("\"endmodule\"")
    {   this->self.add_pattern
            ("uint", "[0-9]+")
            ("exponent", "[eE][\\+\\-]?{uint}");
        this->self = m_sSemicolon
            | m_sEqual
            | m_sColon
            | m_sCharOP
            | m_sCharCP
            | m_sComma
            | m_sString[lex::_val = boost::phoenix::bind(extractString(), lex::_start, lex::_end)]
            | m_sKW_real//[lex::_val = boost::phoenix::bind(&type::getReal)]
            | m_sKW_integer//[lex::_val = boost::phoenix::bind(&type::getInteger)]
            | m_sKW_string//[lex::_val = boost::phoenix::bind(&type::getString)]
            | m_sKW_parameter
            | m_sKW_units
            | m_sKW_access
            | m_sKW_idt_nature
            | m_sKW_ddt_nature
            | m_sKW_abstol
            | m_sKW_nature
            | m_sKW_endnature
            | m_sKW_continuous
            | m_sKW_discrete
            | m_sKW_domain
            | m_sKW_discipline
            | m_sKW_enddiscipline
            | m_sReal[lex::_val = boost::phoenix::bind(extractValue<double>(), lex::_start, lex::_end)]
            | m_sInteger[lex::_val = boost::phoenix::bind(extractValue<int>(), lex::_start, lex::_end)]
            | m_sKW_potential
            | m_sKW_flow
            | m_sKW_module
            | m_sKW_endmodule
            | m_sIdentifier
            | m_sInclude [ lex::_state = "INCLUDE" ]
            ;
        this->self("INCLUDE") += m_sString [
            lex::_state = "INITIAL", lex::_pass = boost::phoenix::bind(onInclude(), lex::_start, lex::_end)
        ];
        this->self("WS") = m_sWS
            | m_sLineComment
            | m_sCComment
            ;
    }
};
template<typename Iterator, typename Lexer>
class natureParser:public qi::grammar<Iterator, symbol::symbolPtr(void), qi::in_state_skipper<Lexer> >
{   qi::rule<Iterator, symbol::symbolPtr(void), qi::in_state_skipper<Lexer> > m_sStart;
    qi::rule<Iterator, std::pair<nature::enumAttribute, nature::attributeValue>(void), qi::in_state_skipper<Lexer> > m_sProperty;
    qi::rule<Iterator, std::string(), qi::in_state_skipper<Lexer> > m_sName;
    public:
    template<typename Tokens>
    natureParser(const Tokens &_rTokens)
        :natureParser::base_type(m_sStart)
    {   m_sProperty = (_rTokens.m_sKW_units
                >> _rTokens.m_sEqual
                >> _rTokens.m_sString
                >> _rTokens.m_sSemicolon
                )
            | (_rTokens.m_sKW_access
                >> _rTokens.m_sEqual
                >> _rTokens.m_sIdentifier
                >> _rTokens.m_sSemicolon
                )
            | (_rTokens.m_sKW_idt_nature
                >> _rTokens.m_sEqual
                >> _rTokens.m_sIdentifier
                >> _rTokens.m_sSemicolon
                )
            | (_rTokens.m_sKW_ddt_nature
                >> _rTokens.m_sEqual
                >> _rTokens.m_sIdentifier
                >> _rTokens.m_sSemicolon
                )
            | (_rTokens.m_sKW_abstol
                >> _rTokens.m_sEqual
                >> _rTokens.m_sReal
                >> _rTokens.m_sSemicolon
                )
            ;
        m_sName = (_rTokens.m_sColon >> _rTokens.m_sIdentifier);
        m_sStart = (_rTokens.m_sKW_nature
            >> _rTokens.m_sIdentifier
            >> -m_sName
            >> _rTokens.m_sSemicolon
            >> *(m_sProperty)
            >> _rTokens.m_sKW_endnature
            );
        m_sStart.name("start");
        m_sProperty.name("property");
    }
};
/*
// Conservative discipline
discipline electrical; 
  potential    Voltage;
  flow         Current;
enddiscipline
*/
// a parser for a discipline declaration
template<typename Iterator, typename Lexer>
class disciplineParser:public qi::grammar<Iterator, symbol::symbolPtr(void), qi::in_state_skipper<Lexer> >
{   qi::rule<Iterator, symbol::symbolPtr(void), qi::in_state_skipper<Lexer> > m_sStart;
    typedef std::pair<bool, boost::intrusive_ptr<const nature> > CPotentialAndNature;
    struct empty
    {   bool operator<(const empty&) const
        {   return false;
        }
        friend std::ostream &operator<<(std::ostream &_r, const empty&)
        {   return _r;
        }
    };
    typedef boost::variant<empty, CPotentialAndNature, discipline::enumDomain> property;
    qi::rule<Iterator, discipline::enumDomain(), qi::in_state_skipper<Lexer> > m_sDomain;
    qi::rule<Iterator, property(void), qi::in_state_skipper<Lexer> > m_sProperty;
    public:
    template<typename Tokens>
    disciplineParser(const Tokens &_rTokens)
        :disciplineParser::base_type(m_sStart)
    {   m_sDomain = _rTokens.m_sKW_continuous
            | _rTokens.m_sKW_discrete
            ;
        m_sProperty = (_rTokens.m_sKW_potential >> _rTokens.m_sIdentifier >> _rTokens.m_sSemicolon)
            | (_rTokens.m_sKW_flow >> _rTokens.m_sIdentifier >> _rTokens.m_sSemicolon)
            | (_rTokens.m_sKW_domain >> m_sDomain >> _rTokens.m_sSemicolon)
            ;
        m_sStart = (_rTokens.m_sKW_discipline
            >> _rTokens.m_sIdentifier
            >> _rTokens.m_sSemicolon
            >> *m_sProperty
            >> _rTokens.m_sKW_enddiscipline
        );
    }
};
template<typename Iterator, typename Lexer>
class moduleParser:public qi::grammar<Iterator, symbol::symbolPtr(void), qi::in_state_skipper<Lexer> >
{   public:
    qi::rule<Iterator, symbol::symbolPtr(void), qi::in_state_skipper<Lexer> > m_sStart;
    qi::rule<Iterator, symbol::symbolVector(void), qi::in_state_skipper<Lexer> > m_sModulePortList;
    qi::rule<Iterator, symbol::symbolVector(void), qi::in_state_skipper<Lexer> > m_sPortList;
    qi::rule<Iterator, symbol::symbolPtr(void), qi::in_state_skipper<Lexer> > m_sPort;
    qi::rule<Iterator, std::shared_ptr<symbol::push_scope>(void), qi::in_state_skipper<Lexer> > m_sModule;

    typedef boost::intrusive_ptr<const ref_counter> intrusivePtr;
    typedef std::vector<intrusivePtr> vectorOfPtr;
    qi::rule<Iterator, vectorOfPtr(void), qi::in_state_skipper<Lexer> > m_sModuleItemList;
    qi::rule<Iterator, intrusivePtr(void), qi::in_state_skipper<Lexer> > m_sParameter;
    qi::rule<Iterator, intrusivePtr(void), qi::in_state_skipper<Lexer> > m_sModuleItem;
    qi::rule<Iterator, type::typePtr(void), qi::in_state_skipper<Lexer> > m_sType;

    template<typename Tokens>
    moduleParser(const Tokens &_rTokens)
        :moduleParser::base_type(m_sStart)
    {   m_sPort = _rTokens.m_sIdentifier;
        m_sPortList %= m_sPort % _rTokens.m_sComma;
        m_sModulePortList %= _rTokens.m_sCharOP >> m_sPortList >> _rTokens.m_sCharCP;
        m_sModule = _rTokens.m_sKW_module;
        m_sType = _rTokens.m_sKW_real | _rTokens.m_sKW_integer | _rTokens.m_sKW_string;
        m_sParameter = _rTokens.m_sKW_parameter
            >> m_sType
            >> _rTokens.m_sIdentifier
        ;
        m_sModuleItem = m_sParameter;
        m_sModuleItemList %= *m_sModuleItem;
        m_sStart = (m_sModule
                >> _rTokens.m_sIdentifier
                >> m_sModulePortList
                >> m_sModuleItemList
                >> _rTokens.m_sKW_endmodule);
    }
};
template<typename Iterator, typename Lexer>
class fileParser:public qi::grammar<Iterator, symbol::symbolVector(void), qi::in_state_skipper<Lexer> >
{   public:
    disciplineParser<Iterator, Lexer> m_sDiscipline;
    natureParser<Iterator, Lexer> m_sNature;
    moduleParser<Iterator, Lexer> m_sModule;
    qi::rule<Iterator, symbol::symbolVector(void), qi::in_state_skipper<Lexer> > m_sStart;
    qi::rule<Iterator, symbol::symbolPtr(void), qi::in_state_skipper<Lexer> > m_sItem;
    //public:
    template<typename Tokens>
    fileParser(const Tokens &_rTokens)
        :fileParser::base_type(m_sStart),
        m_sNature(_rTokens),
        m_sDiscipline(_rTokens),
        m_sModule(_rTokens)
    {   m_sItem = m_sDiscipline | m_sNature | m_sModule;
        m_sStart = *m_sItem;
    }
};
}
int main()
{   std::string sInput = "\
nature Current;\n\
  units        = \"A\";\n\
  access       = I;\n\
  idt_nature   = Charge;\n\
  abstol       = 1e-12;\n\
endnature\n\
\n\
// Charge in coulombs\n\
nature Charge;\n\
  units      = \"coul\";\n\
  access     = Q;\n\
  ddt_nature = Current;\n\
  abstol     = 1e-14;\n\
endnature\n\
\n\
// Potential in volts\n\
nature Voltage;\n\
  units      = \"V\";\n\
  access     = V;\n\
  idt_nature = Flux;\n\
  abstol     = 1e-6;\n\
endnature\n\
\n\
discipline electrical;\n\
  potential    Voltage;\n\
  flow         Current;\n\
enddiscipline\n\
";
    typedef lex::lexertl::token<frank::myIterator, boost::mpl::vector<frank::type::typePtr, std::string, double, int> > token_type;
    typedef lex::lexertl::actor_lexer<token_type> lexer_type;
    typedef frank::lexer<lexer_type>::iterator_type iterator_type;
    typedef frank::fileParser<iterator_type, frank::lexer<lexer_type>::lexer_def> grammar_type;

    frank::lexer<lexer_type> sLexer;
    grammar_type sParser(sLexer);
    frank::symbol::push_scope sPush;
    auto pStringBegin = frank::myIterator(sInput);
    auto pBegin(sLexer.begin(pStringBegin, frank::myIterator(sInput, true)));
    const auto b = qi::phrase_parse(pBegin, sLexer.end(), sParser, qi::in_state("WS")[sLexer.self]); 
}
4

2 回答 2

1

有没有人让这个词法分析器工作,返回除了 double 或 int 或 string 以外的东西?

当然。可以在此站点上找到简单的示例

并且对于解析器还返回非平凡的对象?

这是你真正的问题。Spirit 非常适合在 eDSL 中轻松表达的解析器子集,并且具有“神奇地”映射到选择的属性的巨大好处。

一些现实是:

  • 属性应该具有价值语义;使用多态属性是困难的(如何使用 boost::spirit::qi 解析器的多态属性?,例如)

  • 使用 Lex 会使大部分最佳位置消失,因为所有“高级”解析器(如real_parser, [u]int_parser)都在窗外。Spirit 开发人员记录在案,他们不喜欢使用 Lex。此外,Spirit X3 不再支持 Lex。


背景资料:

我非常考虑将源原样解析为直接值类型的 AST 节点。我知道,这可能是您认为的“微不足道的对象”,但不要被明显的简单性所欺骗:递归变体树具有一定的表达能力。

例子

从那以后,我创建了代码以将该 AST 转换为具有完全正确所有权、级联词法范围的节点/边缘属性和交叉引用的域表示。如果您有兴趣,我刚刚恢复了这项工作并将其放在 github 上,主要是因为该任务在许多方面都非常相似,例如覆盖/继承属性和解析范围内的标识符: https ://github.com/ sehe/spirit-graphviz/blob/master/spirit-graphviz.cpp#L660

建议,想法

在你的情况下,我会采取类似的方法来保持简单性。显示的代码(尚未)涵盖最棘手的成分(例如学科内的自然属性覆盖)。

一旦您开始实施用例,例如解决给定节点的兼容学科和绝对容差,您就需要一个具有完全保真度的域模型。最好不要丢失源信息和不可变的 AST 信息²。

作为中间立场,您可能可以避免在内存中构建整个源 AST,只是为了一次性对其进行转换,在顶层您可以拥有:

file = qi::skip(skipper) [
        *(m_sDiscipline | m_sNature | m_sModule) [process_ast(_1)]
    ];

在哪里process_ast将“微不足道”的 AST 表示应用到域类型中,一次一个。这样你就只保留了少量的临时 AST 表示。

域表示可以任意复杂以支持您的所有逻辑和用例。

让我们“展示,不要告诉”

烘焙最简单的与语法相匹配的 AST:

namespace frank { namespace ast {
    struct nature {
        struct empty{};

        std::string name;
        std::string inherits;

        enum class Attribute { units, access, idt, ddt, abstol };
        using Value = boost::variant<int, double, std::string>;
        std::map<Attribute, Value> attributes;
    };

    struct discipline {
        enum enumDomain { eUnspecified, eDiscrete, eContinuous };
        struct properties_t {
            enumDomain domain = eUnspecified;
            boost::optional<std::string> flow, potential;
        };

        std::string name;
        properties_t properties;
    };

    // TODO
    using module = qi::unused_type;
    using file   = std::vector<boost::variant<nature, discipline, module> >;

    enum class type { real, integer, string };
} }

这是微不足道的,并将 1:1 映射到语法产生式上,这意味着我们的阻抗非常小。

代币?我们不需要 Lex

您可以拥有通用的标记解析器,而不需要 Lex 的复杂性

是的,Lex(尤其是静态生成的)可以潜在地提高性能,但是

  • 如果你需要的话,我敢打赌,灵气无论如何都不是你最好的选择
  • 过早优化...

我做了什么:

struct tokens {
    // implicit lexemes
    qi::rule<It, std::string()> string, identifier;
    qi::rule<It, double()> real;
    qi::rule<It, int()> integer;
    qi::rule<It, ast::nature::Value()> value;
    qi::rule<It, ast::nature::Attribute()> attribute;
    qi::rule<It, ast::discipline::enumDomain()> domain;

    struct attribute_sym_t : qi::symbols<char, ast::nature::Attribute> {
        attribute_sym_t() {
            this->add
               ("units", ast::nature::Attribute::units)
               ("access", ast::nature::Attribute::access)
               ("idt_nature", ast::nature::Attribute::idt)
               ("ddt_nature", ast::nature::Attribute::ddt)
               ("abstol", ast::nature::Attribute::abstol);
        }
    } attribute_sym;

    struct domain_sym_t : qi::symbols<char, ast::discipline::enumDomain> {
        domain_sym_t() {
            this->add
               ("discrete", ast::discipline::eDiscrete)
               ("continuous", ast::discipline::eContinuous);
        }
    } domain_sym;

    tokens() {
        using namespace qi;
        auto kw = qr::distinct(copy(char_("a-zA-Z0-9_")));

        string     = '"' >> *("\\" >> char_ | ~char_('"')) >> '"';
        identifier = char_("a-zA-Z_") >> *char_("a-zA-Z0-9_");
        real       = double_;
        integer    = int_;
        attribute  = kw[attribute_sym];
        domain     = kw[domain_sym];

        value = string | identifier | real | integer;

        BOOST_SPIRIT_DEBUG_NODES((string)(identifier)(real)(integer)(value)(domain)(attribute))
    }
};

解脱不好吗?注意如何

  • 所有属性都会自动传播
  • 字符串处理转义(这一位已在您的 Lex 方法中注释掉)。我们甚至不需要语义操作来(严重)撬出未引用/未转义的值
  • 我们曾经distinct确保关键字解析只匹配完整的标识符。(请参阅如何以提升精神正确解析保留字)。

    这实际上是您注意到缺少单独词法分析器的地方。

    另一方面,这使得上下文相关的关键字变得轻而易举(lex 可以轻松地将关键字优先于出现在关键字不能出现的地方的标识符。⁴)

跳过空格/评论怎么办?

我们本可以添加一个令牌,但出于约定的原因,我将其设为解析器:

struct skipParser : qi::grammar<It> {
    skipParser() : skipParser::base_type(spaceOrComment) {
        using namespace qi;
        spaceOrComment = space
            | ("//" >> *(char_ - eol) >> (eoi|eol))
            | ("/*" >> *(char_ - "*/") >> "*/");

        BOOST_SPIRIT_DEBUG_NODES((spaceOrComment))
    }
  private:
    qi::rule<It> spaceOrComment;
};

natureParser

我们继承了我们的 AST 解析器tokens

struct natureParser : tokens, qi::grammar<It, ast::nature(), skipParser> {

从那里开始一帆风顺:

property = attribute >> '=' >> value >> ';';

nature
    = kw["nature"] >> identifier >> -(':' >> identifier) >> ';'
    >> *property
    >> kw["endnature"];

disciplineParser

discipline = kw["discipline"] >> identifier >> ';' 
    >> properties 
    >> kw["enddiscipline"]
    ;

properties
    = kw["domain"] >> domain >> ';'
    ^ kw["flow"] >> identifier >> ';'
    ^ kw["potential"] >> identifier >> ';'
    ;

这显示了一种竞争方法,该方法使用置换运算符 ( ^) 以任何顺序将可选替代项解析为固定frank::ast::discipline属性结构。当然,您可能会选择在这里使用更通用的表示,就像我们使用ast::nature.

模块 AST 留给读者作为练习,尽管解析器规则在下面实现。

顶层,封装了船长

我讨厌必须从调用代码中指定船长(它比要求的更复杂,并且更改船长会改变语法)。所以,我将它封装在顶级解析器中:

struct fileParser : qi::grammar<It, ast::file()> {
    fileParser() : fileParser::base_type(file) {
        file = qi::skip(qi::copy(m_sSkip)) [
                *(m_sDiscipline | m_sNature | m_sModule)
            ];

        BOOST_SPIRIT_DEBUG_NODES((file))
    }
  private:
    disciplineParser m_sDiscipline;
    natureParser     m_sNature;
    moduleParser     m_sModule;
    skipParser       m_sSkip;

    qi::rule<It, ast::file()> file;
};

演示时间

此演示添加operator<<了枚举和变体访问者,用于打印一些 AST 详细信息以用于调试/演示目的 ( print_em)。

然后我们有一个测试驱动程序:

int main() {
    using iterator_type = std::string::const_iterator;

    iterator_type iter = sInput.begin(), last = sInput.end();

    frank::Parsers<iterator_type>::fileParser parser;
    print_em print;

    frank::ast::file file;
    bool ok = qi::parse(iter, last, parser, file);

    if (ok) {
        for (auto& symbol : file)
            print(symbol);
    }
    else {
        std::cout << "Parse failed\n";
    }

    if (iter != last) {
        std::cout << "Remaining unparsed: '" << std::string(iter,last) << "'\n";
    }
}

通过您问题的示例输入,我们得到以下输出:

Live On Coliru

-- Nature
name: Current
inherits: 
attribute: units = A
attribute: access = I
attribute: idt = Charge
attribute: abstol = 1e-12
-- Nature
name: Charge
inherits: 
attribute: units = coul
attribute: access = Q
attribute: ddt = Current
attribute: abstol = 1e-14
-- Nature
name: Voltage
inherits: 
attribute: units = V
attribute: access = V
attribute: idt = Flux
attribute: abstol = 1e-06
-- Discipline
name: electrical
domain: (unspecified)
flow:  Current
potential:  Voltage
Remaining unparsed: '
'

通过BOOST_SPIRIT_DEBUG定义,您可以获得丰富的调试信息:Live On Coliru

完整列表

Live On Coliru

//#define BOOST_SPIRIT_DEBUG
#include <map>
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapted.hpp>
#include <boost/spirit/repository/include/qi_distinct.hpp>

namespace qi = boost::spirit::qi;

namespace frank { namespace ast {
    struct nature {
        struct empty{};

        std::string name;
        std::string inherits;

        enum class Attribute { units, access, idt, ddt, abstol };
        using Value = boost::variant<int, double, std::string>;
        std::map<Attribute, Value> attributes;
    };

    struct discipline {
        enum enumDomain { eUnspecified, eDiscrete, eContinuous };
        struct properties_t {
            enumDomain domain = eUnspecified;
            boost::optional<std::string> flow, potential;
        };

        std::string name;
        properties_t properties;
    };

    // TODO
    using module = qi::unused_type;
    using file   = std::vector<boost::variant<nature, discipline, module> >;

    enum class type { real, integer, string };
} }

BOOST_FUSION_ADAPT_STRUCT(frank::ast::nature, name, inherits, attributes)
BOOST_FUSION_ADAPT_STRUCT(frank::ast::discipline, name, properties)
BOOST_FUSION_ADAPT_STRUCT(frank::ast::discipline::properties_t, domain, flow, potential)

namespace frank {
    namespace qr = boost::spirit::repository::qi;

    template <typename It> struct Parsers {

        struct tokens {
            // implicit lexemes
            qi::rule<It, std::string()> string, identifier;
            qi::rule<It, double()> real;
            qi::rule<It, int()> integer;
            qi::rule<It, ast::nature::Value()> value;
            qi::rule<It, ast::nature::Attribute()> attribute;
            qi::rule<It, ast::discipline::enumDomain()> domain;

            struct attribute_sym_t : qi::symbols<char, ast::nature::Attribute> {
                attribute_sym_t() {
                    this->add
                       ("units", ast::nature::Attribute::units)
                       ("access", ast::nature::Attribute::access)
                       ("idt_nature", ast::nature::Attribute::idt)
                       ("ddt_nature", ast::nature::Attribute::ddt)
                       ("abstol", ast::nature::Attribute::abstol);
                }
            } attribute_sym;

            struct domain_sym_t : qi::symbols<char, ast::discipline::enumDomain> {
                domain_sym_t() {
                    this->add
                       ("discrete", ast::discipline::eDiscrete)
                       ("continuous", ast::discipline::eContinuous);
                }
            } domain_sym;

            tokens() {
                using namespace qi;
                auto kw = qr::distinct(copy(char_("a-zA-Z0-9_")));

                string     = '"' >> *("\\" >> char_ | ~char_('"')) >> '"';
                identifier = char_("a-zA-Z_") >> *char_("a-zA-Z0-9_");
                real       = double_;
                integer    = int_;
                attribute  = kw[attribute_sym];
                domain     = kw[domain_sym];

                value = string | identifier | real | integer;

                BOOST_SPIRIT_DEBUG_NODES((string)(identifier)(real)(integer)(value)(domain)(attribute))
            }
        };

        struct skipParser : qi::grammar<It> {
            skipParser() : skipParser::base_type(spaceOrComment) {
                using namespace qi;
                spaceOrComment = space
                    | ("//" >> *(char_ - eol) >> (eoi|eol))
                    | ("/*" >> *(char_ - "*/") >> "*/");

                BOOST_SPIRIT_DEBUG_NODES((spaceOrComment))
            }
          private:
            qi::rule<It> spaceOrComment;
        };

        struct natureParser : tokens, qi::grammar<It, ast::nature(), skipParser> {
            natureParser() : natureParser::base_type(nature) {
                using namespace qi;
                auto kw = qr::distinct(copy(char_("a-zA-Z0-9_")));

                property = attribute >> '=' >> value >> ';';

                nature
                    = kw["nature"] >> identifier >> -(':' >> identifier) >> ';'
                    >> *property
                    >> kw["endnature"];

                BOOST_SPIRIT_DEBUG_NODES((nature)(property))
            }
          private:
            using Attribute = std::pair<ast::nature::Attribute, ast::nature::Value>;

            qi::rule<It, ast::nature(), skipParser> nature;
            qi::rule<It, Attribute(), skipParser> property;

            using tokens::attribute;
            using tokens::value;
            using tokens::identifier;
        };

        struct disciplineParser : tokens, qi::grammar<It, ast::discipline(), skipParser> {
            disciplineParser() : disciplineParser::base_type(discipline) {

                auto kw = qr::distinct(qi::copy(qi::char_("a-zA-Z0-9_")));

                discipline = kw["discipline"] >> identifier >> ';' 
                    >> properties 
                    >> kw["enddiscipline"]
                    ;

                properties
                    = kw["domain"] >> domain >> ';'
                    ^ kw["flow"] >> identifier >> ';'
                    ^ kw["potential"] >> identifier >> ';'
                    ;

                BOOST_SPIRIT_DEBUG_NODES((discipline)(properties))
            }
          private:
            qi::rule<It, ast::discipline(), skipParser> discipline;
            qi::rule<It, ast::discipline::properties_t(), skipParser> properties;

            using tokens::domain;
            using tokens::identifier;
        };

        struct moduleParser : tokens, qi::grammar<It, ast::module(), skipParser> {
            moduleParser() : moduleParser::base_type(module) {
                auto kw = qr::distinct(qi::copy(qi::char_("a-zA-Z0-9_")));

                m_sPort           = identifier;
                m_sPortList       = m_sPort % ',';
                m_sModulePortList = '(' >> m_sPortList >> ')';
                m_sModule         = kw["module"];
                m_sType           = kw["real"] | kw["integer"] | kw["string"];
                m_sParameter      = kw["parameter"] >> m_sType >> identifier;
                m_sModuleItem     = m_sParameter;
                m_sModuleItemList = *m_sModuleItem;
                module =
                    (m_sModule >> identifier >> m_sModulePortList >> m_sModuleItemList >> kw["endmodule"]);
            }
          private:
            qi::rule<It, ast::module(), skipParser> module;
            qi::rule<It, skipParser> m_sModulePortList;
            qi::rule<It, skipParser> m_sPortList;
            qi::rule<It, skipParser> m_sPort;
            qi::rule<It, skipParser> m_sModule;

            qi::rule<It, skipParser> m_sModuleItemList;
            qi::rule<It, skipParser> m_sParameter;
            qi::rule<It, skipParser> m_sModuleItem;
            qi::rule<It, skipParser> m_sType;

            using tokens::identifier;
        };

        struct fileParser : qi::grammar<It, ast::file()> {
            fileParser() : fileParser::base_type(file) {
                file = qi::skip(qi::copy(m_sSkip)) [
                        *(m_sDiscipline | m_sNature | m_sModule)
                    ];

                BOOST_SPIRIT_DEBUG_NODES((file))
            }
          private:
            disciplineParser m_sDiscipline;
            natureParser     m_sNature;
            moduleParser     m_sModule;
            skipParser       m_sSkip;

            qi::rule<It, ast::file()> file;
        };
    };

}

extern std::string const sInput;

// just for demo
#include <boost/optional/optional_io.hpp>

namespace frank { namespace ast {
    //static inline std::ostream &operator<<(std::ostream &os, const nature::empty &) { return os; }
    static inline std::ostream &operator<<(std::ostream &os, nature::Attribute a) {
        switch(a) {
            case nature::Attribute::units:  return os << "units";
            case nature::Attribute::access: return os << "access";
            case nature::Attribute::idt:    return os << "idt";
            case nature::Attribute::ddt:    return os << "ddt";
            case nature::Attribute::abstol: return os << "abstol";
        };
        return os << "?";
    }
    static inline std::ostream &operator<<(std::ostream &os, discipline::enumDomain d) {
        switch(d) {
            case discipline::eDiscrete:    return os << "discrete";
            case discipline::eContinuous:  return os << "continuous";
            case discipline::eUnspecified: return os << "(unspecified)";
        };
        return os << "?";
    }
} }

struct print_em {
    using result_type = void;
    template <typename V>
    void operator()(V const& variant) const {
        boost::apply_visitor(*this, variant);
    }
    void operator()(frank::ast::nature const& nature) const {
        std::cout << "-- Nature\n";
        std::cout << "name: " << nature.name << "\n";
        std::cout << "inherits: " << nature.inherits << "\n";
        for (auto& a : nature.attributes) {
            std::cout << "attribute: " << a.first << " = " << a.second << "\n";
        }
    }
    void operator()(frank::ast::discipline const& discipline) const {
        std::cout << "-- Discipline\n";
        std::cout << "name: " << discipline.name << "\n";
        std::cout << "domain: " << discipline.properties.domain << "\n";
        std::cout << "flow: " << discipline.properties.flow << "\n";
        std::cout << "potential: " << discipline.properties.potential << "\n";
    }
    void operator()(frank::ast::module const&) const {
        std::cout << "-- Module (TODO)\n";
    }
};

int main() {
    using iterator_type = std::string::const_iterator;

    iterator_type iter = sInput.begin(), last = sInput.end();

    frank::Parsers<iterator_type>::fileParser parser;
    print_em print;

    frank::ast::file file;
    bool ok = parse(iter, last, parser, file);

    if (ok) {
        for (auto& symbol : file)
            print(symbol);
    }
    else {
        std::cout << "Parse failed\n";
    }

    if (iter != last) {
        std::cout << "Remaining unparsed: '" << std::string(iter,last) << "'\n";
    }
}

std::string const sInput = R"(
nature Current;
  units        = "A";
  access       = I;
  idt_nature   = Charge;
  abstol       = 1e-12;
endnature

// Charge in coulombs
nature Charge;
  units      = "coul";
  access     = Q;
  ddt_nature = Current;
  abstol     = 1e-14;
endnature

// Potential in volts
nature Voltage;
  units      = "V";
  access     = V;
  idt_nature = Flux;
  abstol     = 1e-6;
endnature

discipline electrical;
  potential    Voltage;
  flow         Current;
enddiscipline
)";

¹顺便说一句,那里的另一个答案证明了与多态属性和精神的“阻抗不匹配” - 这次是在它的业力方面

²(以防止取决于评估顺序或类似情况的细微错误,例如)

³(从这里收集一些但没有导入太多的复杂性,这在您的 Lex 方法中没有反映出来)

⁴(事实上,这是你需要在语法中进行状态切换的地方,这是一个众所周知的不发达且在 Spirit Lex 中几乎无法使用的领域:例如,当它工作时如何避免定义与 boost::spirit::lex 中的所有内容匹配的标记或者当它变得很糟糕时:Boost.Spirit SQL 语法/词法分析器失败

于 2017-11-10T22:19:16.043 回答
0

一种解决方案是在任何地方使用 std::string 并定义一个 boost::variant 所需的一切,但不要直接在解析器或词法分析器中的任何地方使用它,而只是将其序列化和反序列化到字符串中/从字符串中反序列化。

这就是 boost::spirit 的创始人的意图吗?

于 2017-11-10T18:36:49.190 回答