1

我需要解析(yaml)之类的东西:

- from: src
  to: 
    - target1
    - target2
- from: src2
  to: 
    - target3
    - target4

我尝试了类似(简化伪)

  identifierRule = +alnum;
  fromToRule = lit("-") >> 
    ( 
      "from:" >> identifierRule >> qi::eol >>
      (
        ("to: " >> qi::eol >> +(qi::repeat(indention)[qi::blank] >> "-" >> identifierRule >> qi::eol))
    );

但是通过这种方法,第二个“来自”条目被解析为第一个“到”条目的附加条目,而不是作为新的单独条目。有没有办法检索当前缩进级别并将其用作附加规则信息?

4

1 回答 1

0

当然,您应该使用 YAML 库(例如yaml-cpp),因为 YAML 更加通用并且......充满了解析器特性。不要自己滚。

不过,假设你是想学习灵气,那这个问题是有道理的。

这一点都不是微不足道的,而且很大程度上取决于您要解析的内容。只关注显示的输入,我想像这样的 AST:

using Key   = std::string;
using Raw   = std::string;
using Value = boost::make_recursive_variant<  //
    Raw,                                      //
    std::map<Key, boost::recursive_variant_>, //
    std::vector<boost::recursive_variant_>>::type;

using List = std::vector<Value>;
using Dict = std::map<Key, Value>;

所以,"- a\n- b"将是一个列表,"a: b\nc: d"将是一个字典,其他任何东西都是一个原始值。

为了能够嵌套,让我们创建由级别编号参数化的规则:

using Entry = std::pair<Key, Value>;
qi::rule<It, Value()>       start;
qi::rule<It, Value(int)>    value;
qi::rule<It, List(int)>     list;
qi::rule<It, Dict(int)>     dict;
qi::rule<It, Entry(int)>    entry;

qi::rule<It, Key()>     key;
qi::rule<It, Raw()>     rawvalue;
qi::rule<It, void(int)> linebreak_;

keyrawvalue从不包含换行符,因此不需要该参数。linebreak_不公开属性,而是制定了规则,因此我们可以为其启用调试输出。

现在,依靠大量经验,我可能会编写如下规则:

using namespace qi;
_r1_type level; // friendly name for inherited attribute
auto nested    = level + 1;

首先,我们可以让它保持“可读性”。马上,一些帮手:

linebreak_     = *blank >> eol >> repeat(level)["  "];
auto linebreak = linebreak_(level);
auto identchar = copy(char_("a-zA-Z0-9_"));

我们用速记帮助自己,所以我们不必重复自己。qi::copy但是请注意(proto::deep_copy参见例如将解析器分配给自动变量)的微妙存在。

现在,我们可以非常“天真”地制定规则:

key      = (identchar - digit) >> *identchar;
rawvalue = omit[*blank >> &graph] >> *(char_ - eol);

这里发生的模糊性是在原始值的开头未指定空格的省略。现在,让我们继续自上而下地了解level-aware 产品:

start    = value(0);
value    = *linebreak >> (list(level) | dict(level) | rawvalue);

我们从列表开始,因为它的"- "前缀最容易识别:

list     = ("- " >> value(nested)) % linebreak;

记住nested只是凤凰的表达方式level + 1

dict     = entry(level) % linebreak;

字典对所有条目保持相同的级别。

entry    = key >> skip(blank)[":"] >> value(nested);

请注意,我们容忍:.

一切都滚在一起:

template <typename It> struct Parser : qi::grammar<It, Value()> {
    Parser() : Parser::base_type(start) {
        using namespace qi;
        _r1_type level; // friendly name for inherited attribute

        auto nested    = level + 1;
        linebreak_     = *blank >> eol >> repeat(level)["  "];
        auto linebreak = linebreak_(level);
        auto identchar = copy(char_("a-zA-Z0-9_"));

        key      = (identchar - digit) >> *identchar;
        rawvalue = omit[*blank >> &graph] >> *(char_ - eol);
        entry    = key >> skip(blank)[":"] >> value(nested);
        dict     = entry(level) % linebreak;
        list     = ("- " >> value(nested)) % linebreak;
        value    = *linebreak >> (list(level) | dict(level) | rawvalue);
        start    = value(0);

        BOOST_SPIRIT_DEBUG_NODES(
            (start)(value)(list)(dict)(entry)(rawvalue)(key)/*(linebreak_)*/)
    }

  private:
    using Entry = std::pair<Key, Value>;
    qi::rule<It, Value()>    start;
    qi::rule<It, Value(int)> value;
    qi::rule<It, List(int)>  list;
    qi::rule<It, Dict(int)>  dict;
    qi::rule<It, Entry(int)> entry;

    qi::rule<It, Key()>     key;
    qi::rule<It, Raw()>     rawvalue;
    qi::rule<It, void(int)> linebreak_;
};

添加最少的代码来打印生成的 AST:Live On Compiler Explorer

//#define BOOST_SPIRIT_DEBUG
#include <boost/fusion/adapted/std_pair.hpp> // for map attributes
#include <boost/phoenix.hpp>
#include <boost/spirit/include/qi.hpp>
#include <fmt/ostream.h>
#include <fmt/ranges.h>
#include <map>
namespace qi = boost::spirit::qi;

auto sample = R"(- from: src
to: 
    - target1
    - target2
- from: src2
to: 
    - target3
    - target4)";

using Key   = std::string;
using Raw   = std::string;
using Value = boost::make_recursive_variant<  //
    Raw,                                      //
    std::map<Key, boost::recursive_variant_>, //
    std::vector<boost::recursive_variant_>>::type;

using List = std::vector<Value>;
using Dict = std::map<Key, Value>;

struct Printer {
    std::ostream& _os;
    std::ostreambuf_iterator<char> _out{_os};
    Printer(std::ostream& os) : _os(os) {}

    template <typename... Ts>
    auto operator()(boost::variant<Ts...> const& v) const { boost::apply_visitor(*this, v); }
    auto operator()(auto const& v) const { return fmt::format_to(_out, "{}", v); }
};

template <> struct fmt::formatter<Value> : ostream_formatter {};

static inline std::ostream& operator<<(std::ostream& os, Value const& v) {
    Printer{os}(v);
    return os;
}

template <typename It> struct Parser : qi::grammar<It, Value()> {
    Parser() : Parser::base_type(start) {
        using namespace qi;
        _r1_type level; // friendly name for inherited attribute

        auto nested    = level + 1;
        linebreak_     = *blank >> eol >> repeat(level)["  "];
        auto linebreak = linebreak_(level);
        auto identchar = copy(char_("a-zA-Z0-9_"));

        key      = (identchar - digit) >> *identchar;
        rawvalue = omit[*blank >> &graph] >> *(char_ - eol);
        entry    = key >> skip(blank)[":"] >> value(nested);
        dict     = entry(level) % linebreak;
        list     = ("- " >> value(nested)) % linebreak;
        value    = *linebreak >> (list(level) | dict(level) | rawvalue);
        start    = value(0);

        BOOST_SPIRIT_DEBUG_NODES(
            (start)(value)(list)(dict)(entry)(rawvalue)(key)/*(linebreak_)*/)
    }

private:
    using Entry = std::pair<Key, Value>;
    qi::rule<It, Value()>    start;
    qi::rule<It, Value(int)> value;
    qi::rule<It, List(int)>  list;
    qi::rule<It, Dict(int)>  dict;
    qi::rule<It, Entry(int)> entry;

    qi::rule<It, Key()>     key;
    qi::rule<It, Raw()>     rawvalue;
    qi::rule<It, void(int)> linebreak_;
};

int main() {
    for (std::string const input : {sample}) {
        auto f = begin(input), l = end(input);
        Parser<decltype(f)> p;

        if (Value v; parse(f, l, p, v)) {
            fmt::print("Parsed: {}\n", v);
        } else {
            fmt::print("Parsed failed\n");
        }

        if (f != l) {
            fmt::print("Remaining: '{}'\n", std::string(f,l));
        }
    }
}

印刷

Parsed: [{"from": src, "to": [target1, target2]}, {"from": src2, "to": [target3, target4]}]

BOOST_SPIRIT_DEBUG启用:

<start>
  <try>- from: src\n  to: \n </try>
  <value>
    <try>- from: src\n  to: \n </try>
    <list>
      <try>- from: src\n  to: \n </try>
      <value>
        <try>from: src\n  to: \n   </try>
        <list>
          <try>from: src\n  to: \n   </try>
          <fail/>
        </list>
        <dict>
          <try>from: src\n  to: \n   </try>
          <entry>
            <try>from: src\n  to: \n   </try>
            <key>
              <try>from: src\n  to: \n   </try>
              <success>: src\n  to: \n    - t</success>
              <attributes>[[f, r, o, m]]</attributes>
            </key>
            <value>
              <try> src\n  to: \n    - ta</try>
              <list>
                <try> src\n  to: \n    - ta</try>
                <fail/>
              </list>
              <dict>
                <try> src\n  to: \n    - ta</try>
                <entry>
                  <try> src\n  to: \n    - ta</try>
                  <key>
                    <try> src\n  to: \n    - ta</try>
                    <fail/>
                  </key>
                  <fail/>
                </entry>
                <fail/>
              </dict>
              <rawvalue>
                <try> src\n  to: \n    - ta</try>
                <success>\n  to: \n    - target</success>
                <attributes>[[s, r, c]]</attributes>
              </rawvalue>
              <success>\n  to: \n    - target</success>
              <attributes>[[s, r, c], 2]</attributes>
            </value>
            <success>\n  to: \n    - target</success>
            <attributes>[[[f, r, o, m], [s, r, c]], 1]</attributes>
          </entry>
          <entry>
            <try>to: \n    - target1\n </try>
            <key>
              <try>to: \n    - target1\n </try>
              <success>: \n    - target1\n   </success>
              <attributes>[[t, o]]</attributes>
            </key>
            <value>
              <try> \n    - target1\n    </try>
              <list>
                <try>- target1\n    - targ</try>
                <value>
                  <try>target1\n    - target</try>
                  <list>
                    <try>target1\n    - target</try>
                    <fail/>
                  </list>
                  <dict>
                    <try>target1\n    - target</try>
                    <entry>
                      <try>target1\n    - target</try>
                      <key>
                        <try>target1\n    - target</try>
                        <success>\n    - target2\n- fro</success>
                        <attributes>[[t, a, r, g, e, t, 1]]</attributes>
                      </key>
                      <fail/>
                    </entry>
                    <fail/>
                  </dict>
                  <rawvalue>
                    <try>target1\n    - target</try>
                    <success>\n    - target2\n- fro</success>
                    <attributes>[[t, a, r, g, e, t, 1]]</attributes>
                  </rawvalue>
                  <success>\n    - target2\n- fro</success>
                  <attributes>[[t, a, r, g, e, t, 1], 3]</attributes>
                </value>
                <value>
                  <try>target2\n- from: src2</try>
                  <list>
                    <try>target2\n- from: src2</try>
                    <fail/>
                  </list>
                  <dict>
                    <try>target2\n- from: src2</try>
                    <entry>
                      <try>target2\n- from: src2</try>
                      <key>
                        <try>target2\n- from: src2</try>
                        <success>\n- from: src2\n  to: </success>
                        <attributes>[[t, a, r, g, e, t, 2]]</attributes>
                      </key>
                      <fail/>
                    </entry>
                    <fail/>
                  </dict>
                  <rawvalue>
                    <try>target2\n- from: src2</try>
                    <success>\n- from: src2\n  to: </success>
                    <attributes>[[t, a, r, g, e, t, 2]]</attributes>
                  </rawvalue>
                  <success>\n- from: src2\n  to: </success>
                  <attributes>[[t, a, r, g, e, t, 2], 3]</attributes>
                </value>
                <success>\n- from: src2\n  to: </success>
                <attributes>[[[t, a, r, g, e, t, 1], [t, a, r, g, e, t, 2]], 2]</attributes>
              </list>
              <success>\n- from: src2\n  to: </success>
              <attributes>[[[t, a, r, g, e, t, 1], [t, a, r, g, e, t, 2]], 2]</attributes>
            </value>
            <success>\n- from: src2\n  to: </success>
            <attributes>[[[t, o], [[t, a, r, g, e, t, 1], [t, a, r, g, e, t, 2]]], 1]</attributes>
          </entry>
          <success>\n- from: src2\n  to: </success>
          <attributes>[[[[f, r, o, m], [s, r, c]], [[t, o], [[t, a, r, g, e, t, 1], [t, a, r, g, e, t, 2]]]], 1]</attributes>
        </dict>
        <success>\n- from: src2\n  to: </success>
        <attributes>[[[[f, r, o, m], [s, r, c]], [[t, o], [[t, a, r, g, e, t, 1], [t, a, r, g, e, t, 2]]]], 1]</attributes>
      </value>
      <value>
        <try>from: src2\n  to: \n  </try>
        <list>
          <try>from: src2\n  to: \n  </try>
          <fail/>
        </list>
        <dict>
          <try>from: src2\n  to: \n  </try>
          <entry>
            <try>from: src2\n  to: \n  </try>
            <key>
              <try>from: src2\n  to: \n  </try>
              <success>: src2\n  to: \n    - </success>
              <attributes>[[f, r, o, m]]</attributes>
            </key>
            <value>
              <try> src2\n  to: \n    - t</try>
              <list>
                <try> src2\n  to: \n    - t</try>
                <fail/>
              </list>
              <dict>
                <try> src2\n  to: \n    - t</try>
                <entry>
                  <try> src2\n  to: \n    - t</try>
                  <key>
                    <try> src2\n  to: \n    - t</try>
                    <fail/>
                  </key>
                  <fail/>
                </entry>
                <fail/>
              </dict>
              <rawvalue>
                <try> src2\n  to: \n    - t</try>
                <success>\n  to: \n    - target</success>
                <attributes>[[s, r, c, 2]]</attributes>
              </rawvalue>
              <success>\n  to: \n    - target</success>
              <attributes>[[s, r, c, 2], 2]</attributes>
            </value>
            <success>\n  to: \n    - target</success>
            <attributes>[[[f, r, o, m], [s, r, c, 2]], 1]</attributes>
          </entry>
          <entry>
            <try>to: \n    - target3\n </try>
            <key>
              <try>to: \n    - target3\n </try>
              <success>: \n    - target3\n   </success>
              <attributes>[[t, o]]</attributes>
            </key>
            <value>
              <try> \n    - target3\n    </try>
              <list>
                <try>- target3\n    - targ</try>
                <value>
                  <try>target3\n    - target</try>
                  <list>
                    <try>target3\n    - target</try>
                    <fail/>
                  </list>
                  <dict>
                    <try>target3\n    - target</try>
                    <entry>
                      <try>target3\n    - target</try>
                      <key>
                        <try>target3\n    - target</try>
                        <success>\n    - target4</success>
                        <attributes>[[t, a, r, g, e, t, 3]]</attributes>
                      </key>
                      <fail/>
                    </entry>
                    <fail/>
                  </dict>
                  <rawvalue>
                    <try>target3\n    - target</try>
                    <success>\n    - target4</success>
                    <attributes>[[t, a, r, g, e, t, 3]]</attributes>
                  </rawvalue>
                  <success>\n    - target4</success>
                  <attributes>[[t, a, r, g, e, t, 3], 3]</attributes>
                </value>
                <value>
                  <try>target4</try>
                  <list>
                    <try>target4</try>
                    <fail/>
                  </list>
                  <dict>
                    <try>target4</try>
                    <entry>
                      <try>target4</try>
                      <key>
                        <try>target4</try>
                        <success></success>
                        <attributes>[[t, a, r, g, e, t, 4]]</attributes>
                      </key>
                      <fail/>
                    </entry>
                    <fail/>
                  </dict>
                  <rawvalue>
                    <try>target4</try>
                    <success></success>
                    <attributes>[[t, a, r, g, e, t, 4]]</attributes>
                  </rawvalue>
                  <success></success>
                  <attributes>[[t, a, r, g, e, t, 4], 3]</attributes>
                </value>
                <success></success>
                <attributes>[[[t, a, r, g, e, t, 3], [t, a, r, g, e, t, 4]], 2]</attributes>
              </list>
              <success></success>
              <attributes>[[[t, a, r, g, e, t, 3], [t, a, r, g, e, t, 4]], 2]</attributes>
            </value>
            <success></success>
            <attributes>[[[t, o], [[t, a, r, g, e, t, 3], [t, a, r, g, e, t, 4]]], 1]</attributes>
          </entry>
          <success></success>
          <attributes>[[[[f, r, o, m], [s, r, c, 2]], [[t, o], [[t, a, r, g, e, t, 3], [t, a, r, g, e, t, 4]]]], 1]</attributes>
        </dict>
        <success></success>
        <attributes>[[[[f, r, o, m], [s, r, c, 2]], [[t, o], [[t, a, r, g, e, t, 3], [t, a, r, g, e, t, 4]]]], 1]</attributes>
      </value>
      <success></success>
      <attributes>[[[[[f, r, o, m], [s, r, c]], [[t, o], [[t, a, r, g, e, t, 1], [t, a, r, g, e, t, 2]]]], [[[f, r, o, m], [s, r, c, 2]], [[t, o], [[t, a, r, g, e, t, 3], [t, a, r, g, e, t, 4]]]]], 0]</attributes>
    </list>
    <success></success>
    <attributes>[[[[[f, r, o, m], [s, r, c]], [[t, o], [[t, a, r, g, e, t, 1], [t, a, r, g, e, t, 2]]]], [[[f, r, o, m], [s, r, c, 2]], [[t, o], [[t, a, r, g, e, t, 3], [t, a, r, g, e, t, 4]]]]], 0]</attributes>
  </value>
  <success></success>
  <attributes>[[[[[f, r, o, m], [s, r, c]], [[t, o], [[t, a, r, g, e, t, 1], [t, a, r, g, e, t, 2]]]], [[[f, r, o, m], [s, r, c, 2]], [[t, o], [[t, a, r, g, e, t, 3], [t, a, r, g, e, t, 4]]]]]]</attributes>
</start>
Parsed: [{"from": src, "to": [target1, target2]}, {"from": src2, "to": [target3, target4]}]

于 2022-02-15T16:09:26.093 回答