当然,您应该使用 YAML 库(例如yaml-cpp),因为 YAML 更加通用并且......充满了解析器特性。不要自己滚。
不过,假设你是想学习灵气,那这个问题是有道理的。
这一点都不是微不足道的,而且很大程度上取决于您要解析的内容。只关注显示的输入,我想像这样的 AST:
using Key = std::string;
using Raw = std::string;
using Value = boost::make_recursive_variant< //
Raw, //
std::map<Key, boost::recursive_variant_>, //
std::vector<boost::recursive_variant_>>::type;
using List = std::vector<Value>;
using Dict = std::map<Key, Value>;
所以,"- a\n- b"
将是一个列表,"a: b\nc: d"
将是一个字典,其他任何东西都是一个原始值。
为了能够嵌套,让我们创建由级别编号参数化的规则:
using Entry = std::pair<Key, Value>;
qi::rule<It, Value()> start;
qi::rule<It, Value(int)> value;
qi::rule<It, List(int)> list;
qi::rule<It, Dict(int)> dict;
qi::rule<It, Entry(int)> entry;
qi::rule<It, Key()> key;
qi::rule<It, Raw()> rawvalue;
qi::rule<It, void(int)> linebreak_;
仅key
且rawvalue
从不包含换行符,因此不需要该参数。linebreak_
不公开属性,而是制定了规则,因此我们可以为其启用调试输出。
现在,依靠大量经验,我可能会编写如下规则:
using namespace qi;
_r1_type level; // friendly name for inherited attribute
auto nested = level + 1;
首先,我们可以让它保持“可读性”。马上,一些帮手:
linebreak_ = *blank >> eol >> repeat(level)[" "];
auto linebreak = linebreak_(level);
auto identchar = copy(char_("a-zA-Z0-9_"));
我们用速记帮助自己,所以我们不必重复自己。qi::copy
但是请注意(proto::deep_copy
参见例如将解析器分配给自动变量)的微妙存在。
现在,我们可以非常“天真”地制定规则:
key = (identchar - digit) >> *identchar;
rawvalue = omit[*blank >> &graph] >> *(char_ - eol);
这里发生的模糊性是在原始值的开头未指定空格的省略。现在,让我们继续自上而下地了解level
-aware 产品:
start = value(0);
value = *linebreak >> (list(level) | dict(level) | rawvalue);
我们从列表开始,因为它的"- "
前缀最容易识别:
list = ("- " >> value(nested)) % linebreak;
记住nested
只是凤凰的表达方式level + 1
。
dict = entry(level) % linebreak;
字典对所有条目保持相同的级别。
entry = key >> skip(blank)[":"] >> value(nested);
请注意,我们容忍:
.
一切都滚在一起:
template <typename It> struct Parser : qi::grammar<It, Value()> {
Parser() : Parser::base_type(start) {
using namespace qi;
_r1_type level; // friendly name for inherited attribute
auto nested = level + 1;
linebreak_ = *blank >> eol >> repeat(level)[" "];
auto linebreak = linebreak_(level);
auto identchar = copy(char_("a-zA-Z0-9_"));
key = (identchar - digit) >> *identchar;
rawvalue = omit[*blank >> &graph] >> *(char_ - eol);
entry = key >> skip(blank)[":"] >> value(nested);
dict = entry(level) % linebreak;
list = ("- " >> value(nested)) % linebreak;
value = *linebreak >> (list(level) | dict(level) | rawvalue);
start = value(0);
BOOST_SPIRIT_DEBUG_NODES(
(start)(value)(list)(dict)(entry)(rawvalue)(key)/*(linebreak_)*/)
}
private:
using Entry = std::pair<Key, Value>;
qi::rule<It, Value()> start;
qi::rule<It, Value(int)> value;
qi::rule<It, List(int)> list;
qi::rule<It, Dict(int)> dict;
qi::rule<It, Entry(int)> entry;
qi::rule<It, Key()> key;
qi::rule<It, Raw()> rawvalue;
qi::rule<It, void(int)> linebreak_;
};
添加最少的代码来打印生成的 AST:Live On Compiler Explorer
//#define BOOST_SPIRIT_DEBUG
#include <boost/fusion/adapted/std_pair.hpp> // for map attributes
#include <boost/phoenix.hpp>
#include <boost/spirit/include/qi.hpp>
#include <fmt/ostream.h>
#include <fmt/ranges.h>
#include <map>
namespace qi = boost::spirit::qi;
auto sample = R"(- from: src
to:
- target1
- target2
- from: src2
to:
- target3
- target4)";
using Key = std::string;
using Raw = std::string;
using Value = boost::make_recursive_variant< //
Raw, //
std::map<Key, boost::recursive_variant_>, //
std::vector<boost::recursive_variant_>>::type;
using List = std::vector<Value>;
using Dict = std::map<Key, Value>;
struct Printer {
std::ostream& _os;
std::ostreambuf_iterator<char> _out{_os};
Printer(std::ostream& os) : _os(os) {}
template <typename... Ts>
auto operator()(boost::variant<Ts...> const& v) const { boost::apply_visitor(*this, v); }
auto operator()(auto const& v) const { return fmt::format_to(_out, "{}", v); }
};
template <> struct fmt::formatter<Value> : ostream_formatter {};
static inline std::ostream& operator<<(std::ostream& os, Value const& v) {
Printer{os}(v);
return os;
}
template <typename It> struct Parser : qi::grammar<It, Value()> {
Parser() : Parser::base_type(start) {
using namespace qi;
_r1_type level; // friendly name for inherited attribute
auto nested = level + 1;
linebreak_ = *blank >> eol >> repeat(level)[" "];
auto linebreak = linebreak_(level);
auto identchar = copy(char_("a-zA-Z0-9_"));
key = (identchar - digit) >> *identchar;
rawvalue = omit[*blank >> &graph] >> *(char_ - eol);
entry = key >> skip(blank)[":"] >> value(nested);
dict = entry(level) % linebreak;
list = ("- " >> value(nested)) % linebreak;
value = *linebreak >> (list(level) | dict(level) | rawvalue);
start = value(0);
BOOST_SPIRIT_DEBUG_NODES(
(start)(value)(list)(dict)(entry)(rawvalue)(key)/*(linebreak_)*/)
}
private:
using Entry = std::pair<Key, Value>;
qi::rule<It, Value()> start;
qi::rule<It, Value(int)> value;
qi::rule<It, List(int)> list;
qi::rule<It, Dict(int)> dict;
qi::rule<It, Entry(int)> entry;
qi::rule<It, Key()> key;
qi::rule<It, Raw()> rawvalue;
qi::rule<It, void(int)> linebreak_;
};
int main() {
for (std::string const input : {sample}) {
auto f = begin(input), l = end(input);
Parser<decltype(f)> p;
if (Value v; parse(f, l, p, v)) {
fmt::print("Parsed: {}\n", v);
} else {
fmt::print("Parsed failed\n");
}
if (f != l) {
fmt::print("Remaining: '{}'\n", std::string(f,l));
}
}
}
印刷
Parsed: [{"from": src, "to": [target1, target2]}, {"from": src2, "to": [target3, target4]}]
并BOOST_SPIRIT_DEBUG
启用:
<start>
<try>- from: src\n to: \n </try>
<value>
<try>- from: src\n to: \n </try>
<list>
<try>- from: src\n to: \n </try>
<value>
<try>from: src\n to: \n </try>
<list>
<try>from: src\n to: \n </try>
<fail/>
</list>
<dict>
<try>from: src\n to: \n </try>
<entry>
<try>from: src\n to: \n </try>
<key>
<try>from: src\n to: \n </try>
<success>: src\n to: \n - t</success>
<attributes>[[f, r, o, m]]</attributes>
</key>
<value>
<try> src\n to: \n - ta</try>
<list>
<try> src\n to: \n - ta</try>
<fail/>
</list>
<dict>
<try> src\n to: \n - ta</try>
<entry>
<try> src\n to: \n - ta</try>
<key>
<try> src\n to: \n - ta</try>
<fail/>
</key>
<fail/>
</entry>
<fail/>
</dict>
<rawvalue>
<try> src\n to: \n - ta</try>
<success>\n to: \n - target</success>
<attributes>[[s, r, c]]</attributes>
</rawvalue>
<success>\n to: \n - target</success>
<attributes>[[s, r, c], 2]</attributes>
</value>
<success>\n to: \n - target</success>
<attributes>[[[f, r, o, m], [s, r, c]], 1]</attributes>
</entry>
<entry>
<try>to: \n - target1\n </try>
<key>
<try>to: \n - target1\n </try>
<success>: \n - target1\n </success>
<attributes>[[t, o]]</attributes>
</key>
<value>
<try> \n - target1\n </try>
<list>
<try>- target1\n - targ</try>
<value>
<try>target1\n - target</try>
<list>
<try>target1\n - target</try>
<fail/>
</list>
<dict>
<try>target1\n - target</try>
<entry>
<try>target1\n - target</try>
<key>
<try>target1\n - target</try>
<success>\n - target2\n- fro</success>
<attributes>[[t, a, r, g, e, t, 1]]</attributes>
</key>
<fail/>
</entry>
<fail/>
</dict>
<rawvalue>
<try>target1\n - target</try>
<success>\n - target2\n- fro</success>
<attributes>[[t, a, r, g, e, t, 1]]</attributes>
</rawvalue>
<success>\n - target2\n- fro</success>
<attributes>[[t, a, r, g, e, t, 1], 3]</attributes>
</value>
<value>
<try>target2\n- from: src2</try>
<list>
<try>target2\n- from: src2</try>
<fail/>
</list>
<dict>
<try>target2\n- from: src2</try>
<entry>
<try>target2\n- from: src2</try>
<key>
<try>target2\n- from: src2</try>
<success>\n- from: src2\n to: </success>
<attributes>[[t, a, r, g, e, t, 2]]</attributes>
</key>
<fail/>
</entry>
<fail/>
</dict>
<rawvalue>
<try>target2\n- from: src2</try>
<success>\n- from: src2\n to: </success>
<attributes>[[t, a, r, g, e, t, 2]]</attributes>
</rawvalue>
<success>\n- from: src2\n to: </success>
<attributes>[[t, a, r, g, e, t, 2], 3]</attributes>
</value>
<success>\n- from: src2\n to: </success>
<attributes>[[[t, a, r, g, e, t, 1], [t, a, r, g, e, t, 2]], 2]</attributes>
</list>
<success>\n- from: src2\n to: </success>
<attributes>[[[t, a, r, g, e, t, 1], [t, a, r, g, e, t, 2]], 2]</attributes>
</value>
<success>\n- from: src2\n to: </success>
<attributes>[[[t, o], [[t, a, r, g, e, t, 1], [t, a, r, g, e, t, 2]]], 1]</attributes>
</entry>
<success>\n- from: src2\n to: </success>
<attributes>[[[[f, r, o, m], [s, r, c]], [[t, o], [[t, a, r, g, e, t, 1], [t, a, r, g, e, t, 2]]]], 1]</attributes>
</dict>
<success>\n- from: src2\n to: </success>
<attributes>[[[[f, r, o, m], [s, r, c]], [[t, o], [[t, a, r, g, e, t, 1], [t, a, r, g, e, t, 2]]]], 1]</attributes>
</value>
<value>
<try>from: src2\n to: \n </try>
<list>
<try>from: src2\n to: \n </try>
<fail/>
</list>
<dict>
<try>from: src2\n to: \n </try>
<entry>
<try>from: src2\n to: \n </try>
<key>
<try>from: src2\n to: \n </try>
<success>: src2\n to: \n - </success>
<attributes>[[f, r, o, m]]</attributes>
</key>
<value>
<try> src2\n to: \n - t</try>
<list>
<try> src2\n to: \n - t</try>
<fail/>
</list>
<dict>
<try> src2\n to: \n - t</try>
<entry>
<try> src2\n to: \n - t</try>
<key>
<try> src2\n to: \n - t</try>
<fail/>
</key>
<fail/>
</entry>
<fail/>
</dict>
<rawvalue>
<try> src2\n to: \n - t</try>
<success>\n to: \n - target</success>
<attributes>[[s, r, c, 2]]</attributes>
</rawvalue>
<success>\n to: \n - target</success>
<attributes>[[s, r, c, 2], 2]</attributes>
</value>
<success>\n to: \n - target</success>
<attributes>[[[f, r, o, m], [s, r, c, 2]], 1]</attributes>
</entry>
<entry>
<try>to: \n - target3\n </try>
<key>
<try>to: \n - target3\n </try>
<success>: \n - target3\n </success>
<attributes>[[t, o]]</attributes>
</key>
<value>
<try> \n - target3\n </try>
<list>
<try>- target3\n - targ</try>
<value>
<try>target3\n - target</try>
<list>
<try>target3\n - target</try>
<fail/>
</list>
<dict>
<try>target3\n - target</try>
<entry>
<try>target3\n - target</try>
<key>
<try>target3\n - target</try>
<success>\n - target4</success>
<attributes>[[t, a, r, g, e, t, 3]]</attributes>
</key>
<fail/>
</entry>
<fail/>
</dict>
<rawvalue>
<try>target3\n - target</try>
<success>\n - target4</success>
<attributes>[[t, a, r, g, e, t, 3]]</attributes>
</rawvalue>
<success>\n - target4</success>
<attributes>[[t, a, r, g, e, t, 3], 3]</attributes>
</value>
<value>
<try>target4</try>
<list>
<try>target4</try>
<fail/>
</list>
<dict>
<try>target4</try>
<entry>
<try>target4</try>
<key>
<try>target4</try>
<success></success>
<attributes>[[t, a, r, g, e, t, 4]]</attributes>
</key>
<fail/>
</entry>
<fail/>
</dict>
<rawvalue>
<try>target4</try>
<success></success>
<attributes>[[t, a, r, g, e, t, 4]]</attributes>
</rawvalue>
<success></success>
<attributes>[[t, a, r, g, e, t, 4], 3]</attributes>
</value>
<success></success>
<attributes>[[[t, a, r, g, e, t, 3], [t, a, r, g, e, t, 4]], 2]</attributes>
</list>
<success></success>
<attributes>[[[t, a, r, g, e, t, 3], [t, a, r, g, e, t, 4]], 2]</attributes>
</value>
<success></success>
<attributes>[[[t, o], [[t, a, r, g, e, t, 3], [t, a, r, g, e, t, 4]]], 1]</attributes>
</entry>
<success></success>
<attributes>[[[[f, r, o, m], [s, r, c, 2]], [[t, o], [[t, a, r, g, e, t, 3], [t, a, r, g, e, t, 4]]]], 1]</attributes>
</dict>
<success></success>
<attributes>[[[[f, r, o, m], [s, r, c, 2]], [[t, o], [[t, a, r, g, e, t, 3], [t, a, r, g, e, t, 4]]]], 1]</attributes>
</value>
<success></success>
<attributes>[[[[[f, r, o, m], [s, r, c]], [[t, o], [[t, a, r, g, e, t, 1], [t, a, r, g, e, t, 2]]]], [[[f, r, o, m], [s, r, c, 2]], [[t, o], [[t, a, r, g, e, t, 3], [t, a, r, g, e, t, 4]]]]], 0]</attributes>
</list>
<success></success>
<attributes>[[[[[f, r, o, m], [s, r, c]], [[t, o], [[t, a, r, g, e, t, 1], [t, a, r, g, e, t, 2]]]], [[[f, r, o, m], [s, r, c, 2]], [[t, o], [[t, a, r, g, e, t, 3], [t, a, r, g, e, t, 4]]]]], 0]</attributes>
</value>
<success></success>
<attributes>[[[[[f, r, o, m], [s, r, c]], [[t, o], [[t, a, r, g, e, t, 1], [t, a, r, g, e, t, 2]]]], [[[f, r, o, m], [s, r, c, 2]], [[t, o], [[t, a, r, g, e, t, 3], [t, a, r, g, e, t, 4]]]]]]</attributes>
</start>
Parsed: [{"from": src, "to": [target1, target2]}, {"from": src2, "to": [target3, target4]}]