c++ - 如何概括精神解析器以任意顺序获取列表？

Question

我有一个简单的解析器，可以解析整数列表或带引号的字符串。

如果我执行SIMPLE_CASE我将输入设为：

std::string input1 = "{ INT: 42, 24 STR: \"Smith\", \"John\" }";

它正确解析为my_record，其中包含一个整数列表和一个 std::string 列表。

我想将此代码修改为通用代码，以便它可以以任意顺序获取零个或多个 INT 列表和零个或多个 STR 列表，并my_record以正确的顺序填充它们。我想要我的第二个更通用的测试用例：

std::string input1 = "{ STR: \"Joe\" INT: 42, 24 STR: \"Smith\", \"John\" }";

解析为：

client::my_record expected1 { { 42, 24 }, {"Joe", "Smith", "John"} };

如果我运行，下面的代码可以正常工作：

/tmp$ g++ -DSIMPLE_CASE -g -std=c++11 sandbox.cpp -o sandbox && ./sandbox

但我不确定在运行此程序时如何让一般情况下工作：

/tmp$ g++ -g -std=c++11 sandbox.cpp -o sandbox && ./sandbox

sandbox.cpp 的代码

#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_object.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>

#include <string>
#include <complex>
#include <algorithm>

namespace client
{
    namespace qi    = boost::spirit::qi;
    namespace ascii = boost::spirit::ascii;

    struct my_record
    {
        std::vector<int>          m_ints;
        std::vector<std::string>  m_strs;

        bool operator==( const my_record& other ) const
        {
            return std::equal( m_ints.begin(), m_ints.end(), other.m_ints.begin() )
                && std::equal( m_strs.begin(), m_strs.end(), other.m_strs.begin() );
        }
        bool operator!=( const my_record& other ) const
        {
            return ! operator==( other );
        }
        friend std::ostream& operator<<( std::ostream& os, const my_record& rec );
    };

    std::ostream& operator<<( std::ostream& os, const my_record& rec )
    {
        for( const auto& x : rec.m_ints )
            std::cerr << x << ' ';
        std::cerr << std::endl;

        for( const auto& x : rec.m_strs )
            std::cerr << x << ' ';
        std::cerr << std::endl;

    }
}

BOOST_FUSION_ADAPT_STRUCT(
    client::my_record,
        (std::vector<int>,          m_ints)
        (std::vector<std::string>,  m_strs)
)

namespace client
{
    template <typename Iterator>
    struct employee_parser : qi::grammar<Iterator, my_record(), ascii::space_type>
    {
        employee_parser() : employee_parser::base_type(start)
    {
        using qi::int_;
        using qi::lit;
        using qi::double_;
        using qi::lexeme;
        using ascii::char_;

        quoted_string %= lexeme['"' >> +(char_ - '"') >> '"'];

#ifdef SIMPLE_CASE
        start %=
            '{'
            >>  int_list
            >>  str_list
            >>  '}'
            ;
#else
        // not sure how to approach this
        start %=
            '{'
            >>  *(int_list)  // want zero or more of these, in any order
            >>  *(str_list)  // want zero or more of these, in any order
            >>  '}'
            ;
#endif

        str_list %=
                lit( "STR:" ) >> quoted_string % ','    
                ;

        int_list %=
                lit( "INT:" ) >> int_ % ','
                ;
    }

    qi::rule<Iterator, std::string(), ascii::space_type>               quoted_string;
    qi::rule<Iterator, std::vector<std::string>(), ascii::space_type>  str_list;
    qi::rule<Iterator, std::vector<int>(),         ascii::space_type>  int_list;

    qi::rule<Iterator, my_record(), ascii::space_type>                 start;
    };
}

static int 
TryParse( const std::string& input, const client::my_record& expected )
{
    using boost::spirit::ascii::space;
    client::my_record                        rec;
    auto                                     iter = input.begin(), end = input.end();
    client::employee_parser<decltype(iter)>  g;
    phrase_parse( iter, end, g, space, rec );
    if ( iter!=end )
    {
        std::cerr << "failed to parse completely" << std::endl;
        return -1;
    } else if ( rec!=expected ) {
        std::cerr << "unexpected result in parse" << std::endl;
        std::cerr << rec;
        return -1;
    }
    return 0;
}

int 
main(int argc, char* argv[])
{
#ifdef SIMPLE_CASE
    client::my_record  expected1 { { 42, 24 }, {"Smith", "John"} }, emp;
    std::string        input1 = "{ INT: 42, 24 STR: \"Smith\", \"John\" }";
    return TryParse( input1, expected1 );
#else
    client::my_record  expected1 { { 42, 24 }, {"Joe", "Smith", "John"} }, emp;
    std::string        input1 = "{ STR: \"Joe\" INT: 42, 24 STR: \"Smith\", \"John\" }";
    return TryParse( input1, expected1 );
#endif

}

score 4 · Accepted Answer

使用is_containerandpush_back_container代替语义操作的替代方法：

步骤1：删除您的BOOST_FUSION_ADAPT_STRUCT宏。

第二步：改变你的start规则。

start %=
            '{'
            >>  *(int_list // want zero or more of these, in any order
                | str_list)  // want zero or more of these, in any order
            >>  '}'
            ;

第三步：添加以下专业。

namespace boost { namespace spirit { namespace traits
{
    template <>
    struct is_container<client::my_record>: mpl::true_//my_record acts as a container
    {};

    template <>
    struct container_value<client::my_record>
    {
        typedef boost::variant<std::vector<int>,std::vector<std::string>> type;//The elements to add to that container are either vector<int> or vector<string>
    };


    template <>
    struct push_back_container<client::my_record,std::vector<int>>//when you add a vector of ints...
    {
        static bool call(client::my_record& c, std::vector<int> const& val)
        {
            c.m_ints.insert(c.m_ints.end(),val.begin(), val.end());//insert it at the end of your acumulated vector of ints
            return true;
        }
    };

    template <>
    struct push_back_container<client::my_record,std::vector<std::string>>//when you add a vector of strings
    {
        static bool call(client::my_record& c, std::vector<std::string> const& val)//insert it at the end of your acumulated vector of strings
        {
            c.m_strs.insert(c.m_strs.end(),val.begin(),val.end());
            return true;
        }
    };

}}}

这是所要求的完整代码（如果我使用多个 push_backs 创建预期结果，则使用 g++ 4.7.1 和 msvc11 编译）：

更新了示例以添加适应结构的另一个成员向量。

#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/adapted/struct.hpp>


#include <string>
#include <vector>
#include <iostream>

namespace client
{
    struct my_subrec
    {
        double foo;
        double bar;
        bool operator==( const my_subrec& other ) const
        {
            return foo==other.foo && bar==other.bar;
        }
    };

    std::ostream& operator<<( std::ostream& os, const my_subrec& rec )
    {
        os << rec.foo << "->" << rec.bar;
        return os;
    }   

}

BOOST_FUSION_ADAPT_STRUCT(client::my_subrec,
                (double, foo)
                (double, bar)
                )


namespace client
{
    namespace qi    = boost::spirit::qi;
    namespace ascii = boost::spirit::ascii;


    struct my_record
    {
        std::vector<int>          m_ints;
        std::vector<std::string>  m_strs;
        std::vector<my_subrec>    m_recs;

        bool operator==( const my_record& other ) const 
        {
            return std::equal( m_ints.begin(), m_ints.end(), other.m_ints.begin() )
                && std::equal( m_strs.begin(), m_strs.end(), other.m_strs.begin() )
                && std::equal( m_recs.begin(), m_recs.end(), other.m_recs.begin() );
        }
        bool operator!=( const my_record& other ) const
        {
            return ! operator==( other );
        }
        friend std::ostream& operator<<( std::ostream& os, const my_record& rec );
    };

    std::ostream& operator<<( std::ostream& os, const my_record& rec ) 
    {
        for( const auto& x : rec.m_ints )
            os << x << ' ';
        os << '\n';

        for( const auto& x : rec.m_strs )
            os << x << ' ';
        os << '\n';

        for( const auto& x : rec.m_recs )
            os << x << ' ';
        return os;
    }
}

//BOOST_FUSION_ADAPT_STRUCT(
//    client::my_record,
//        (std::vector<int>,          m_ints)
//        (std::vector<std::string>,  m_strs)
//)


namespace client
{
    template <typename Iterator>
    struct employee_parser : qi::grammar<Iterator, my_record(), ascii::space_type>
    {
        employee_parser() : employee_parser::base_type(start)
    {
        using qi::int_;
        using qi::lit;
        using qi::double_;
        using qi::lexeme;
        using ascii::char_;

        quoted_string %= lexeme['"' >> +(char_ - '"') >> '"'];

#ifdef SIMPLE_CASE
        start %=
            '{'
            >>  int_list
            >>  str_list
            >>  '}'
            ;
#else
        // not sure how to approach this
        start %=
            '{'
            >>  *(int_list // want zero or more of these, in any order
                | str_list  // want zero or more of these, in any order
                | rec_list)
            >>  '}'
            ;
#endif

        str_list %=
                lit( "STR:" ) >> quoted_string % ','    
                ;

        int_list %=
                lit( "INT:" ) >> int_ % ','
                ;
        rec_list =
                lit( "REC:" ) >> rec % ','
                ;
        rec = double_ >> lit('-') >> double_
                ;
    }

    qi::rule<Iterator, std::string(), ascii::space_type>               quoted_string;
    qi::rule<Iterator, std::vector<std::string>(), ascii::space_type>  str_list;
    qi::rule<Iterator, std::vector<int>(),         ascii::space_type>  int_list;
    qi::rule<Iterator, client::my_subrec(), ascii::space_type> rec;
    qi::rule<Iterator, std::vector<client::my_subrec>(),ascii::space_type> rec_list;

    qi::rule<Iterator, my_record(), ascii::space_type>                 start;
    };
}

namespace boost { namespace spirit { namespace traits
{
    template <>
    struct is_container<client::my_record>: mpl::true_//my_record acts as a container
    {};

    template <>
    struct container_value<client::my_record>
    {
        typedef boost::variant<std::vector<int>,std::vector<std::string>,std::vector<client::my_subrec> >type;
        //The elements to add to that container are vector<int>, vector<string> or vector<my_subrec>
    };


    template <>
    struct push_back_container<client::my_record,std::vector<int>>//when you add a vector of ints...
    {
        static bool call(client::my_record& c, std::vector<int> const& val)
        {
            c.m_ints.insert(c.m_ints.end(),val.begin(), val.end());//insert it at the end of your acumulated vector of ints
            return true;
        }
    };

    template <>
    struct push_back_container<client::my_record,std::vector<std::string>>//when you add a vector of strings
    {
        static bool call(client::my_record& c, std::vector<std::string> const& val)//insert it at the end of your acumulated vector of strings
        {
            c.m_strs.insert(c.m_strs.end(),val.begin(),val.end());
            return true;
        }
    };

    template <>
    struct push_back_container<client::my_record,std::vector<client::my_subrec>>//when you add a vector of subrecs
    {
        static bool call(client::my_record& c, std::vector<client::my_subrec> const& val)//insert it at the end of your acumulated vector of subrecs
        {
            c.m_recs.insert(c.m_recs.end(),val.begin(),val.end());
            return true;
        }
    };

}}}

static int 
TryParse( const std::string& input, const client::my_record& expected )
{
    using boost::spirit::ascii::space;
    client::my_record                        rec;
    auto                                     iter = input.begin(), end = input.end();
    client::employee_parser<decltype(iter)>  g;
    phrase_parse( iter, end, g, space, rec );
    if ( iter!=end )
    {
        std::cerr << "failed to parse completely" << std::endl;
        return -1;
    } else if ( rec!=expected ) {
        std::cerr << "unexpected result in parse" << std::endl;
        std::cerr << rec;
        return -1;
    }
    std::cout << rec << std::endl;
    return 0;
}

int 
main(int argc, char* argv[])
{
#ifdef SIMPLE_CASE
    client::my_record  expected1 { {42, 24 }, {"Smith", "John"} }, emp;
    std::string        input1 = "{ INT: 42, 24 STR: \"Smith\", \"John\" }";
    return TryParse( input1, expected1 );
#else
    client::my_record  expected1 { { 42, 24,240 }, {"Joe", "Smith", "John"}, {{1.5,2.5}} }, emp;

    std::string        input1 = "{ STR: \"Joe\" INT: 42, 24 STR: \"Smith\", \"John\" INT: 240 REC: 1.5-2.5 }";
    return TryParse( input1, expected1 );
#endif

}

score 4 · Accepted Answer

你语法错了，

    start %=
        '{'
        >>  *(int_list)  // want zero or more of these, in any order
        >>  *(str_list)  // want zero or more of these, in any order
        >>  '}'
        ;

这意味着接受任意数量的ints 后跟任意数量的string. 您不能有int、string、int或任何其他组合。

你需要类似的东西

    start %=
        '{'
         >> *( int_list  // want zero or more of these, in any order
             | str_list  // want zero or more of these, in any order
             )
        >>  
        '}'
        ;

但显然你需要将它融入你的数据结构中，注意你可能不得不使用语义操作。

还：

当我在这里时，我不能让这张幻灯片：

    std::ostream& operator<<( std::ostream& os, const my_record& rec )
    {
        for( const auto& x : rec.m_ints )
            std::cerr << x << ' ';
        std::cerr << std::endl;

        for( const auto& x : rec.m_strs )
            std::cerr << x << ' ';
        std::cerr << std::endl;

    }

应该os喜欢：

        for( const auto& x : rec.m_ints )
            os << x << ' ';
        os << '\n';

还要尝试避免endl在流插入运算符中使用 ing，\n如果您需要换行，请使用。

解决方案：

最终需要的是使用 phoenix 函数、push_back 和活页夹。

template<typename Iterator>
struct my_grammar 
: qi::grammar<Iterator, my_record(), ascii::space_type> {

    my_grammar() 
    : my_grammar::base_type(start) {

        quoted_string %= qi::lexeme['"' >> +(qi::char_ - '"') >> '"'];

        start = qi::lit("{")
                >>
                *( "INT:" >> qi::int_     
                    [ 
                        phx::push_back(
                            phx::at_c<0>(
                                qi::_val
                            ), 
                            qi::_1
                        ) 
                    ] % ","
                 | "STR:" >> quoted_string
                     [ 
                        phx::push_back(
                            phx::bind(
                                &my_record::m_strs,
                                qi::_val
                            ), 
                            qi::_1
                        ) 
                    ] % ","
                 )
                >> 
                "}"
                 ;
    }
    qi::rule<Iterator, std::string(), ascii::space_type> quoted_string;
    qi::rule<Iterator, my_record(),   ascii::space_type>   start;
};

整个代码清单可以在这里看到：

http://ideone.com/XW18Z2

c++ - 如何概括精神解析器以任意顺序获取列表？

2 回答 2

还：

解决方案：

Related

Reference