5

我正在尝试构建nom解析器来检查 ID 为 UUID 的 URL

rooms/e19c94cf-53eb-4048-9c94-7ae74ff6d912

我创建了以下内容:

extern crate uuid;
use uuid::Uuid;

named!(room_uuid<&str, Option<Uuid>>,
    do_parse!(
        tag_s!("rooms") >>
        id: opt!(complete!(preceded!(
            tag_s!("/"),
            map_res!(take_s!(36), FromStr::from_str)
        ))) >>

        (id)
    )
);

它可以很好地处理几乎所有情况:

assert_eq!(room_uuid("rooms"), Done("", None));
assert_eq!(room_uuid("rooms/"), Done("/", None));
assert_eq!(room_uuid("rooms/e19c94cf-53eb-4048-9c94-7ae74ff6d912"), Done("", Some(Uuid::parse_str("e19c94cf-53eb-4048-9c94-7ae74ff6d912").unwrap())));

除了 ID 不是有效 UUID 的情况:

assert!(room_uuid("rooms/123").is_err()); # it fails
# room_uuid("rooms/123").to_result() => Ok(None)

据我了解,这是因为opt!将 innerErr转换为None.

我想将 ID 作为可选部分,但如果它存在,它应该是一个有效的 UUID。
不幸的是,我不明白如何将这两件事结合起来:可选性和严格格式。

4

3 回答 3

2

在过去的几周里,我自己才开始与 nom 一起工作,但我找到了解决这个问题的一种方法。它并不完全适合宏,但它确实通过一次修改给出了正确的行为。当没有给出 UUID 时,我会吞下/而不是让它悬空。

#[macro_use]
extern crate nom;
extern crate uuid;

use std::str::FromStr;
use nom::IResult;
use uuid::Uuid;

fn room_uuid(input: &str) -> IResult<&str, Option<Uuid>> {
    // Check that it starts with "rooms"
    let res = tag_s!(input, "rooms");
    let remaining = match res {
        IResult::Incomplete(i) => return IResult::Incomplete(i),
        IResult::Error(e) => return IResult::Error(e),
        IResult::Done(i, _) => i
    };

    // If a slash is not present, return early
    let optional_slash = opt!(remaining, tag_s!("/"));
    let remaining = match optional_slash {
        IResult::Error(_) |
        IResult::Incomplete(_) => return IResult::Done(remaining, None),
        IResult::Done(i, _) => i
    };

    // If something follows a slash, make sure
    // it's a valid UUID
    if remaining.len() > 0 {
        let res = complete!(remaining, map_res!(take_s!(36), FromStr::from_str));
        match res {
            IResult::Done(i, o) => IResult::Done(i, Some(o)),
            IResult::Error(e) => IResult::Error(e),
            IResult::Incomplete(n) => IResult::Incomplete(n)
        }
    } else {
        // This branch allows for "rooms/"
        IResult::Done(remaining, None)
    }
}

#[test]
fn match_room_plus_uuid() {
    use nom::IResult::*;

    assert_eq!(room_uuid("rooms"), Done("", None));
    assert_eq!(room_uuid("rooms/"), Done("", None));
    assert_eq!(room_uuid("rooms/e19c94cf-53eb-4048-9c94-7ae74ff6d912"), Done("", Some(Uuid::parse_str("e19c94cf-53eb-4048-9c94-7ae74ff6d912").unwrap())));
    assert!(room_uuid("rooms/123").is_err());
}
于 2018-02-14T15:08:46.137 回答
2

鉴于解析 URL 不需要您可以使用的流接口synom。它作为syncrate 的一部分被维护,但遗憾的是不再维护(它被合并到syn并更改为仅处理 rust 令牌)。

遗憾的synom是没有提供take_s!and eof!(后者将禁止尾随的 "unparsed" 123),但实现这些很容易。

使用eof!也意味着你不能返回一个未解析的"/"(尽管我认为这是一件好事);并且嵌套option!的最后需要一些展开(您可以返回Option<Option<Uuid>>而不是检测尾随"/")。

操场

#[macro_use]
extern crate synom;

extern crate uuid;
use uuid::Uuid;

macro_rules! take_s {
    ($i:expr, $length:expr) => {{
        let length: usize = $length;
        if 0 == length {
            synom::IResult::Done($i, "")
        } else {
            let mut ci = $i.char_indices().skip(length - 1);
            match ci.next() {
                None => synom::IResult::Error,
                Some(_) => {
                    match ci.next() {
                        None => synom::IResult::Done("", $i),
                        Some((pos, _)) => {
                            let (value, rem) = $i.split_at(pos);
                            synom::IResult::Done(rem, value)
                        },
                    }
                }
            }
        }
    }};
}

macro_rules! eof {
    ($i:expr,) => {{
        if $i.is_empty() {
            synom::IResult::Done($i, ())
        } else {
            synom::IResult::Error
        }
    }};
}

named!(room_uuid -> Option<Uuid>,
    do_parse!(
        tag!("rooms") >>
        id: option!(preceded!(
            tag!("/"),
            option!(
                switch!(map!(take_s!(36), str::parse),
                    Ok(v) => value!(v)
                )
            )
        )) >>
        eof!() >>

        (id.unwrap_or(None))
    )
);

fn main() {
    use synom::IResult::*;

    assert_eq!(room_uuid("rooms"), Done("", None));
    assert_eq!(room_uuid("rooms/"), Done("", None));
    assert_eq!(
        room_uuid("rooms/e19c94cf-53eb-4048-9c94-7ae74ff6d912"),
        Done(
            "",
            Some(Uuid::parse_str("e19c94cf-53eb-4048-9c94-7ae74ff6d912").unwrap())
        )
    );
    assert_eq!(room_uuid("rooms/123"), Error);
}
于 2018-02-13T23:13:35.477 回答
1

好的,所以我得到了它nom和扩展的 URL 格式api/v1/rooms/UUID/tracks/UUID

基础和以前一样:你想检查eof,忽略尾随"/",永远不要等待不完整的结果(alt_complete!在这里做得很好)。

关于您的ErrorKind::Verify愿望:我认为错误类型实际上并不重要,只需忽略它,或手动将其映射到您想要的任何内容。

小心alt_complete!分支:如果重叠,首选选项(通常是“较长的”)应该放在第一位。

我喜欢我的with!助手,但你也可以内联它。

Playground 不支持nom,所以这次没有链接。

#[macro_use]
extern crate nom;

extern crate uuid;
use uuid::Uuid;

named!(uuid<&str, Uuid>, preceded!(
    tag_s!("/"),
    map_res!(take_s!(36), str::parse)
));

#[derive(Clone, PartialEq, Eq, Debug)]
enum ApiRequest {
    Rooms,
    Room { room: Uuid },
    Tracks { room: Uuid },
    Track { room: Uuid, track: Uuid },
}

/// shortcut for: `do_parse!(name: expr >> r: otherexpr >> (r))`
///
/// `otherexpr` should use `name`, otherwise you could just use `preceded!`.
macro_rules! with {
    ($i:expr, $var:ident: $submac:ident!( $($args:tt)* ) >> $($rest:tt)*) => {
        do_parse!($i, $var: $submac!($($args)*) >> r: $($rest)* >> (r));
    };
    ($i:expr, $var:ident: $submac:ident >> $($rest:tt)*) => {
        do_parse!($i, $var: $submac >> r: $($rest)* >> (r));
    };
}

// /api/v1/rooms/UUID/tracks/UUID
named!(apiv1<&str, ApiRequest>, preceded!(tag_s!("/api/v1"),
    alt_complete!(
        preceded!(tag_s!("/rooms"), alt_complete!(
            with!(room: uuid >> alt_complete!(
                preceded!(tag_s!("/tracks"), alt_complete!(
                    with!(track: uuid >> alt_complete!(
                        // ... sub track requests?
                        value!(ApiRequest::Track{room, track})
                    ))
                    |
                    value!(ApiRequest::Tracks{room})
                ))
                // other room requests
                |
                value!(ApiRequest::Room{room})
            ))
            |
            value!(ApiRequest::Rooms)
        ))
        // | ... other requests
    )
));

named!(api<&str, ApiRequest>, terminated!(
    alt_complete!(
        apiv1
        // | ... other versions
        // also could wrap in new enum like:
        //     apiv1 => { ApiRequest::V1 }
        //     |
        //     apiv2 => { ApiRequest::V2 }
    ),
    tuple!(
        alt_complete!(tag_s!("/") | value!("")), // ignore trailing "/"
        eof!() // make sure full URL was parsed
    )
));

fn main() {
    use nom::IResult::*;
    use nom::ErrorKind;

    let room = Uuid::parse_str("e19c94cf-53eb-4048-9c94-7ae74ff6d912").unwrap();
    let track = Uuid::parse_str("83d235e8-03cd-420d-a8c6-6e42440a5573").unwrap();

    assert_eq!(api("/api/v1/rooms"), Done("", ApiRequest::Rooms));
    assert_eq!(api("/api/v1/rooms/"), Done("", ApiRequest::Rooms));
    assert_eq!(
        api("/api/v1/rooms/e19c94cf-53eb-4048-9c94-7ae74ff6d912"),
        Done("", ApiRequest::Room { room })
    );
    assert_eq!(
        api("/api/v1/rooms/e19c94cf-53eb-4048-9c94-7ae74ff6d912/"),
        Done("", ApiRequest::Room { room })
    );
    assert_eq!(
        api("/api/v1/rooms/e19c94cf-53eb-4048-9c94-7ae74ff6d912/tracks"),
        Done("", ApiRequest::Tracks { room })
    );
    assert_eq!(
        api("/api/v1/rooms/e19c94cf-53eb-4048-9c94-7ae74ff6d912/tracks/"),
        Done("", ApiRequest::Tracks { room })
    );
    assert_eq!(
        api("/api/v1/rooms/e19c94cf-53eb-4048-9c94-7ae74ff6d912/tracks/83d235e8-03cd-420d-a8c6-6e42440a5573"),
        Done("", ApiRequest::Track{room, track})
    );
    assert_eq!(
        api("/api/v1/rooms/e19c94cf-53eb-4048-9c94-7ae74ff6d912/tracks/83d235e8-03cd-420d-a8c6-6e42440a5573/"),
        Done("", ApiRequest::Track{room, track})
    );
    assert_eq!(api("/api/v1"), Error(ErrorKind::Alt));
    assert_eq!(api("/api/v1/foo"), Error(ErrorKind::Alt));
    assert_eq!(api("/api/v1/rooms/123"), Error(ErrorKind::Eof));
    assert_eq!(
        api("/api/v1/rooms/e19c94cf-53eb-4048-9c94-7ae74ff6d912/bar"),
        Error(ErrorKind::Eof)
    );
    assert_eq!(
        api("/api/v1/rooms/e19c94cf-53eb-4048-9c94-7ae74ff6d912/tracks/83d235e8-03cd-420d-a8c6-6e42440a5573/123"),
        Error(ErrorKind::Eof)
    );
    assert_eq!(api("/api/v2"), Error(ErrorKind::Alt));
}

您还可以使用更严格的alt_full_opt_slash!分支方法,这将确保分支仅在完全解析输入时才匹配。

然后,您可以使用更“扁平”的方式(尽管嵌套分支应该仍然可以工作)来解析替代方案(尽管这意味着您最终可能会不止一次地解析某些 UUID;现在所有错误都是类似的Alt):

/// Similar to alt_complete, but also requires the branch parses until
/// the end of the input (but ignores a trailing "/").
macro_rules! alt_full_opt_slash {
    (__impl_push2 ($i:expr,) ($($new:tt)*), $($rest:tt)*) => {
        alt_full_opt_slash!(__impl ($i, $($new)*), $($rest)*)
    };
    (__impl_push2 ($i:expr, $($result:tt)+) ($($new:tt)*), $($rest:tt)*) => {
        alt_full_opt_slash!(__impl ($i, $($result)+ | $($new)*), $($rest)*)
    };
    (__impl_push ($($result:tt)*) ($($new:tt)*), $($rest:tt)*) => {
        // modify branch:
        alt_full_opt_slash!(__impl_push2 ($($result)*) (
            terminated!(
                $($new)*,
                tuple!(
                    alt_complete!(tag_s!("/") | value!("")), // ignore trailing "/"
                    eof!() // make sure full URL was parsed
                )
            )
        ), $($rest)*)
    };
    (__impl ($($result:tt)*), $e:ident | $($rest:tt)*) => {
        alt_full_opt_slash!(__impl_push ($($result)*) ( $e ), $($rest)*)
    };
    (__impl ($($result:tt)*), $subrule:ident!( $($args:tt)*) | $($rest:tt)*) => {
        alt_full_opt_slash!(__impl_push ($($result)*) ( $subrule!($($args)*) ), $($rest)*)
    };
    (__impl ($($result:tt)*), $subrule:ident!( $($args:tt)* ) => { $gen:expr } | $($rest:tt)*) => {
        alt_full_opt_slash!(__impl_push ($($result)*) ( $subrule!($($args)*) => { $gen } ), $($rest)*)
    };
    (__impl ($($result:tt)*), $e:ident => { $gen:expr } | $($rest:tt)*) => {
        alt_full_opt_slash!(__impl_push ($($result)*) ( $e => { $gen } ), $($rest)*)
    };
    (__impl ($i:expr, $($result:tt)*), __end) => {
        alt_complete!($i, $($result)*)
    };
    ($i:expr, $($rest:tt)*) => {{
        alt_full_opt_slash!(__impl ($i, ), $($rest)* | __end)
    }};
}

// /api/v1/rooms/UUID/tracks/UUID
named!(apiv1<&str, ApiRequest>, preceded!(tag_s!("/api/v1"),
    alt_full_opt_slash!(
        do_parse!(
            tag_s!("/rooms") >>
            (ApiRequest::Rooms)
        )
        |
        do_parse!(
            tag_s!("/rooms") >>
            room: uuid >>
            (ApiRequest::Room{room})
        )
        |
        do_parse!(
            tag_s!("/rooms") >>
            room: uuid >>
            tag_s!("/tracks") >>
            (ApiRequest::Tracks{room})
        )
        |
        do_parse!(
            tag_s!("/rooms") >>
            room: uuid >>
            tag_s!("/tracks") >>
            track: uuid >>
            (ApiRequest::Track{room, track})
        )
    )
));

named!(api<&str, ApiRequest>, alt_complete!(
    apiv1
    // | ... other versions
    // also could wrap in new enum like:
    //     apiv1 => { ApiRequest::V1 }
    //     |
    //     apiv2 => { ApiRequest::V2 }
));
于 2018-02-14T22:42:51.407 回答