3

我有一个字符串规则对的映射,我想以某种方式创建它们的“联合规则”(rule_t joint_rule;)。如果我这样做:

joint_rule = convert_logformat["%h"] >> convert_logformat["%t"];

比与 parse_phrase 的联合规则匹配字符串

std::string entry = "127.0.0.1 [16/Aug/2012:01:50:02 +0000]";

但是,如果我以这种方式创建联合规则:

for (it = convert_logformat.begin(); it != convert_logformat.end(); it++)
{
   joint_rule = joint_rule.copy() >> (*it).second.copy();
}

它不匹配相同的字符串。为什么?我怎样才能实现与后者类似的东西?


相关代码:

    template <typename Iterator>
bool parse_logentry(Iterator first, Iterator last, std::vector<char>& ip, std::vector<char>& timestamp, std::vector<char>& req, unsigned int& status, unsigned int& transferred_bytes, std::vector<char>& referer, std::vector<char>& ua)
{
    using boost::spirit::qi::char_;
    using boost::spirit::qi::int_;
    using boost::spirit::qi::uint_;
    using boost::spirit::qi::phrase_parse;
    using boost::spirit::ascii::space;
    using boost::spirit::ascii::space_type;
    using boost::phoenix::ref;
    using boost::phoenix::push_back;
    using boost::spirit::qi::_1;
    using boost::spirit::qi::lexeme;
    using boost::spirit::qi::rule;

    typedef boost::spirit::qi::rule<Iterator, std::string(), space_type> rule_t;
    rule_t ip_rule, timestamp_rule, user_rule, req_rule, ref_rule, ua_rule, bytes_rule, status_rule;
    ip_rule %= lexeme[(+char_("0-9."))[ref(ip) = _1]];
    timestamp_rule %= lexeme[('[' >> +(~char_(']')) >> ']')[ref(timestamp) = _1]];
    user_rule %= lexeme[(+~char_(" "))];
    req_rule %= lexeme[('"' >> +(~char_('"')) >> '"')[ref(req) = _1]];
    ref_rule %= lexeme[('"' >> +(~char_('"')) >> '"')[ref(referer) = _1]];
    ua_rule %= lexeme[('"' >> +(~char_('"')) >> '"')[ref(ua) = _1]];
    bytes_rule %= uint_[ref(transferred_bytes) = _1];
    status_rule %= uint_[ref(status) = _1];
    std::map<std::string, rule_t> convert_logformat;
    typename std::map<std::string, rule_t>::iterator it;

    convert_logformat.insert(std::pair<std::string, rule_t>("%h", ip_rule));
    convert_logformat.insert(std::pair<std::string, rule_t>("%t", timestamp_rule));
    //convert_logformat.insert(std::pair<std::string, rule_t>("%r", req_rule));
    //convert_logformat.insert(std::pair<std::string, rule_t>("%>s", status_rule));
    //convert_logformat.insert(std::pair<std::string, rule_t>("%b", bytes_rule));
    //convert_logformat.insert(std::pair<std::string, rule_t>("%u", user_rule));
    //convert_logformat.insert(std::pair<std::string, rule_t>("%{User-agent}i", ua_rule));
    //convert_logformat.insert(std::pair<std::string, rule_t>("%{Referer}i", ref_rule));

    rule_t joint_rule;

    //joint_rule = convert_logformat["%h"] >> convert_logformat["%t"];

    for (it = convert_logformat.begin(); it != convert_logformat.end(); it++)
    {
        joint_rule = joint_rule.copy() >> (*it).second.copy();
        std::cout << (*it).first << ": " << typeid((*it).second).name() << "\n";
    }

    std::cout << "convert_logformath: " << typeid(convert_logformat["%h"]).name() << "\n";

    bool r = phrase_parse(first, last, joint_rule, space);
    if (first != last)
        return false;
    return r;
}
4

1 回答 1

3

咳咳。这真的很简单。你应该初始化你的变量:)

rule_t joint_rule; // what is it initialized to?

for (auto it = convert_logformat.begin(); it != convert_logformat.end(); it++)
{
    joint_rule = joint_rule.copy() >> (*it).second.copy();
}

将第一行更改为

rule_t joint_rule = qi::eps;

它有效:

sehe@mint12:/tmp$ ./test 
127.0.0.1
16/Aug/2012:01:50:02 +0000

您的解析器缺乏一些(好的)常见做法。请参阅下面的整理源代码 (C++11)。

请注意,使用 amap来存储规则看起来很奇怪,因为映射迭代将按键排序,而不是插入顺序。

在http://liveworkspace.org/code/a7f2f94840d63fce43d8c3f56236330e查看代码

// #define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <typeinfo>

namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;

template <typename Iterator>
struct Grammar : qi::grammar<Iterator, std::string(), qi::space_type>
{
    Grammar() : Grammar::base_type(joint_rule)
    {
        using namespace qi;
        ip_rule        %= lexeme[ (+char_("0-9."))[phx::ref(ip)                      =  _1] ]; 
        timestamp_rule %= lexeme[ ('[' >> +(~char_(']')) >> ']')[phx::ref(timestamp) =  _1] ]; 
        user_rule      %= lexeme[ (+~char_(" "))                                ]; 
        req_rule       %= lexeme[ ('"' >> +(~char_('"')) >> '"')[phx::ref(req)       =  _1] ]; 
        ref_rule       %= lexeme[ ('"' >> +(~char_('"')) >> '"')[phx::ref(referer)   =  _1] ]; 
        ua_rule        %= lexeme[ ('"' >> +(~char_('"')) >> '"')[phx::ref(ua)        =  _1] ]; 
        bytes_rule     %= uint_[phx::ref(transferred_bytes)                          =  _1  ]; 
        status_rule    %= uint_[phx::ref(status)                                     =  _1  ]; 

        auto convert_logformat = std::map<std::string, rule_t> {
            { "%h"            , ip_rule }       ,
            { "%t"            , timestamp_rule },
        //  { "%r"            , req_rule }      ,
        //  { "%>s"           , status_rule }   ,
        //  { "%b"            , bytes_rule }    ,
        //  { "%u"            , user_rule }     ,
        //  { "%{User-agent}i", ua_rule }       ,
        //  { "%{Referer}i"   , ref_rule }
        };

        joint_rule = eps;

        for (auto const& p: convert_logformat)
        {
            joint_rule = joint_rule.copy() >> p.second.copy();
        }

        BOOST_SPIRIT_DEBUG_NODE(ip_rule);
        BOOST_SPIRIT_DEBUG_NODE(timestamp_rule);
        BOOST_SPIRIT_DEBUG_NODE(user_rule);
        BOOST_SPIRIT_DEBUG_NODE(req_rule);
        BOOST_SPIRIT_DEBUG_NODE(ref_rule);
        BOOST_SPIRIT_DEBUG_NODE(ua_rule);
        BOOST_SPIRIT_DEBUG_NODE(bytes_rule);
        BOOST_SPIRIT_DEBUG_NODE(status_rule);
    }

    typedef qi::rule<Iterator, std::string(), qi::space_type> rule_t;
    rule_t ip_rule, timestamp_rule, user_rule, req_rule, ref_rule, ua_rule, bytes_rule, status_rule;
    rule_t joint_rule;

    std::vector<char> ip;
    std::vector<char> timestamp;
    std::vector<char> req;
    unsigned int status;
    unsigned int transferred_bytes;
    std::vector<char> referer;
    std::vector<char> ua;
};

template <typename Iterator>
bool parse_logentry(Iterator first, Iterator last, 
        Grammar<Iterator>& parser)
{
    bool r = phrase_parse(first, last, parser, qi::space);

    return (r && (first == last));
}

int main(void)
{
    std::string entry = "127.0.0.1 [16/Aug/2012:01:50:02 +0000]";
    //std::string entry = "127.0.0.1 [16/Aug/2012:01:50:02 +0000] \"GET /check.htm HTTP/1.1\" 200 17 \"-\" \"AgentName/0.1 libwww-perl/5.833\"";

    Grammar<std::string::iterator> parser;

    if (parse_logentry(entry.begin(), entry.end(), parser))
    {
        for (auto i : parser.ip)
            std::cout << i;
        std::cout << "\n";

        for (auto ts: parser.timestamp)
            std::cout << ts;
        std::cout << "\n";
    }
    else
    {
        std::cout << "not ok\n";
    }

    return 0;
}

请注意,除其他外,此设置允许您通过在开始时简单地定义 BOOST_SPIRIT_DEBUG 来启用语法调试。

于 2012-09-10T08:40:20.593 回答