c++ - 如何提高 boost::spirit::x3 键值解析器的性能

Question

我正在使用boost::spirit::x3. 将性能与我的手写解析器进行比较时，boost::spirit::x3比这慢了大约 10%。

我正在使用 boost 1.61 和 GCC 6.1：

$ g++ -std=c++14 -O3 -I/tmp/boost_1_61_0/boost/ main.cpp  && ./a.out

phrase_parse 1.97432 microseconds
parseHeader 1.75742 microseconds

如何提高boost::spirit::x3基于解析器的性能？

#include <iostream>
#include <string>
#include <map>
#include <chrono>

#include <boost/spirit/home/x3.hpp>
#include <boost/fusion/adapted/std_pair.hpp>

using header_map = std::map<std::string, std::string>; 

namespace parser
{
    namespace x3 = boost::spirit::x3;
    using x3::char_;
    using x3::lexeme;

    x3::rule<class map, header_map> const map = "msg";

    const auto key     = +char_("0-9a-zA-Z-");
    const auto value   = +~char_("\r\n");

    const auto header =(key >> ':' >> value >> lexeme["\r\n"]);
    const auto map_def = *header >> lexeme["\r\n"];

    BOOST_SPIRIT_DEFINE(map);
}


template <typename It>
void parseHeader(It& iter, It end, header_map& map)
{
    std::string key;
    std::string value;

    It last = iter;
    bool inKey = true;
    while(iter+1 != end)
    {
        if(inKey && *(iter+1)==':')
        {
            key.assign(last, iter+1);
            iter+=3;
            last = iter;
            inKey = false;
        }
        else if (!inKey && *(iter+1)=='\r' && *(iter+2)=='\n')
        {
            value.assign(last, iter+1);
            map.insert({std::move(key), std::move(value)});
            iter+=3;
            last = iter;
            inKey = true;
        }
        else if (inKey && *(iter)=='\r' && *(iter+1)=='\n') 
        {
            iter+=2;
            break;
        }
        else
        {
            ++iter;
        }
    }
}

template<typename F, typename ...Args>
double benchmark(F func, Args&&... args)
{
    auto start = std::chrono::system_clock::now();

    constexpr auto num = 10 * 1000 * 1000;
    for (std::size_t i = 0; i < num; ++i)
    {
        func(std::forward<Args>(args)...);
    }

    auto end = std::chrono::system_clock::now();
    auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start);

    return duration.count() / (double)num;
}

int main()
{
    const std::size_t headerCount = 20;

    std::string str;
    for(std::size_t i = 0; i < headerCount; ++i)
    {
        std::string num = std::to_string(i);
        str.append("key" + num + ": " + "value" + num + "\r\n");
    }
    str.append("\r\n");

    double t1 = benchmark([&str]() {
        auto iter = str.cbegin();
        auto end = str.cend();

        header_map header;
        phrase_parse(iter, end, parser::map, boost::spirit::x3::ascii::blank, header);
        return header;
    });
    std::cout << "phrase_parse " << t1 << " microseconds"<< std::endl;

    double t2 = benchmark([&str]() {
        auto iter = str.cbegin();
        auto end = str.cend();

        header_map header;
        parseHeader(iter, end, header);
        return header;
    });
    std::cout << "parseHeader " << t2 << " microseconds"<< std::endl;
    return 0;
}

活生生的例子

score 2 · Accepted Answer

这是一个固定的 x3 语法，它更接近您的手卷“解析器”：

const auto key     = +~char_(':');
const auto value   = *(char_ - "\r\n");

const auto header = key >> ':' >> value >> "\r\n";
const auto map    = *header >> "\r\n";

当然，它仍然更严格，更健壮。另外，不要用空格键调用它，因为您的手动解析器也不会这样做。

这是我的盒子上的性能测量：

统计数据平均为 2.5µs 与 3.5µs。

完整代码

使用http://nonius.io进行强大的基准测试：

#include <iostream>
#include <string>
#include <map>
#include <nonius/benchmark.h++>

#include <boost/spirit/home/x3.hpp>
#include <boost/fusion/adapted/std_pair.hpp>

using header_map = std::map<std::string, std::string>; 

namespace parser
{
    namespace x3 = boost::spirit::x3;
    using x3::char_;

    const auto key     = +~char_(':');
    const auto value   = *(char_ - "\r\n");

    const auto header = key >> ':' >> value >> "\r\n";
    const auto map    = *header >> "\r\n";
}


template <typename It>
void parseHeader(It& iter, It end, header_map& map)
{
    std::string key;
    std::string value;

    It last = iter;
    bool inKey = true;
    while(iter+1 != end)
    {
        if(inKey && *(iter+1)==':')
        {
            key.assign(last, iter+1);
            iter+=3;
            last = iter;
            inKey = false;
        }
        else if (!inKey && *(iter+1)=='\r' && *(iter+2)=='\n')
        {
            value.assign(last, iter+1);
            map.insert({std::move(key), std::move(value)});
            iter+=3;
            last = iter;
            inKey = true;
        }
        else if (inKey && *(iter)=='\r' && *(iter+1)=='\n') 
        {
            iter+=2;
            break;
        }
        else
        {
            ++iter;
        }
    }
}

static auto const str = [] {
    std::string tmp;
    const std::size_t headerCount = 20;
    for(std::size_t i = 0; i < headerCount; ++i)
    {
        std::string num = std::to_string(i);
        tmp.append("key" + num + ": " + "value" + num + "\r\n");
    }
    tmp.append("\r\n");
    return tmp;
}();

NONIUS_BENCHMARK("manual", [](nonius::chronometer cm) {

    cm.measure([]() {
        auto iter = str.cbegin();
        auto end = str.cend();

        header_map header;
        parseHeader(iter, end, header);
        assert(header.size() == 20);
        return header.size();
    });
})

NONIUS_BENCHMARK("x3", [](nonius::chronometer cm) {

    cm.measure([] {
        auto iter = str.cbegin();
        auto end = str.cend();

        header_map header;
        parse(iter, end, parser::map, header);
        assert(header.size() == 20);
        return header.size();
    });
})

#include <nonius/main.h++>

我正在使用 gcc 5.4 和 Boost 1.61

score 0 · Accepted Answer

在第一次查看自定义解析器之后，我发现它不像精神解析器那么健壮。

如果您更改第 91 行以\r从尾随中删除，"\r\n"您会明白我的意思。

错误的数据会导致手卷数据出现段错误。

例如：

str.append("key" + num + ": " + "value" + num + "\n");

导致第 46 行出现段错误。

所以第一步，我认为，是修改手动解析器，使其以与精神解析器相同的方式检查边界条件。

虽然我不希望时间完全收敛，但它们会更接近。

c++ - 如何提高 boost::spirit::x3 键值解析器的性能

2 回答 2

完整代码

Related

Reference