0

这是Parsing single-quoted string with escaped quotes with Nom 5Parse string with escaped single quotes的变体。我想将'1 \' 2 \ 3 \\ 4'(原始字符序列)之类的字符串解析为(Rust 字符串),所以除了字符串内部的"1 \\' 2 \\ 3 \\\\ 4"可能性之外,我不关心任何转义。\'尝试使用链接问题中的代码:

use nom::{
  branch::alt,
  bytes::complete::{escaped, tag},
  character::complete::none_of,
  combinator::recognize,
  multi::{many0, separated_list0},
  sequence::delimited,
  IResult,
};

fn parse_quoted_1(input: &str) -> IResult<&str, &str> {
  delimited(
    tag("'"),
    alt((escaped(none_of("\\\'"), '\\', tag("'")), tag(""))),
    tag("'"),
  )(input)
}

fn parse_quoted_2(input: &str) -> IResult<&str, &str> {
  delimited(
    tag("'"),
    recognize(separated_list0(tag("\\'"), many0(none_of("'")))),
    tag("'"),
  )(input)
}

fn main() {
  println!("{:?}", parse_quoted_1(r#"'1'"#));
  println!("{:?}", parse_quoted_2(r#"'1'"#));
  println!("{:?}", parse_quoted_1(r#"'1 \' 2'"#));
  println!("{:?}", parse_quoted_2(r#"'1 \' 2'"#));
  println!("{:?}", parse_quoted_1(r#"'1 \' 2 \ 3'"#));
  println!("{:?}", parse_quoted_2(r#"'1 \' 2 \ 3'"#));
  println!("{:?}", parse_quoted_1(r#"'1 \' 2 \ 3 \\ 4'"#));
  println!("{:?}", parse_quoted_2(r#"'1 \' 2 \ 3 \\ 4'"#));
}

/*
Ok(("", "1"))
Ok(("", "1"))
Ok(("", "1 \\' 2"))
Ok((" 2'", "1 \\"))
Err(Error(Error { input: "1 \\' 2 \\ 3'", code: Tag }))
Ok((" 2 \\ 3'", "1 \\"))
Err(Error(Error { input: "1 \\' 2 \\ 3 \\\\ 4'", code: Tag }))
Ok((" 2 \\ 3 \\\\ 4'", "1 \\"))
*/

只有前 3 个案例按预期工作。

4

2 回答 2

0

一个不好的/必要的解决方案:

use nom::{bytes::complete::take, character::complete::char, sequence::delimited, IResult};

fn parse_quoted(input: &str) -> IResult<&str, &str> {
  fn escaped(input: &str) -> IResult<&str, &str> {
    let mut pc = 0 as char;
    let mut n = 0;
    for (i, c) in input.chars().enumerate() {
      if c == '\'' && pc != '\\' {
        break;
      }
      pc = c;
      n = i + 1;
    }
    take(n)(input)
  }
  delimited(char('\''), escaped, char('\''))(input)
}

fn main() {
  println!("{:?}", parse_quoted(r#"'' ..."#));
  println!("{:?}", parse_quoted(r#"'1' ..."#));
  println!("{:?}", parse_quoted(r#"'1 \' 2' ..."#));
  println!("{:?}", parse_quoted(r#"'1 \' 2 \ 3' ..."#));
  println!("{:?}", parse_quoted(r#"'1 \' 2 \ 3 \\ 4' ..."#));
}

/*
Ok((" ...", ""))
Ok((" ...", "1"))
Ok((" ...", "1 \\' 2"))
Ok((" ...", "1 \\' 2 \\ 3"))
Ok((" ...", "1 \\' 2 \\ 3 \\\\ 4"))
*/

为了允许,'...\\'我们可以类似地存储更多以前的字符:

    let mut pc = 0 as char;
    let mut ppc = 0 as char;
    let mut pppc = 0 as char;
    let mut n = 0;
    for (i, c) in input.chars().enumerate() {
      if (c == '\'' && pc != '\\') || (c == '\'' && pc == '\\' && ppc == '\\' && pppc != '\\') {
        break;
      }
      pppc = ppc;
      ppc = pc;
      pc = c;
      n = i + 1;
    }
于 2021-06-20T19:28:09.870 回答
0

这是我解析引用字符串的方法。

Cow当没有字符串需要转义或复制字符串而不转义斜杠时,它返回参考原始字符串的类型。

您可能需要根据自己的需要进行is_gdtext调整is_quited_char

// is valid character that do not require escaping
fn is_qdtext(chr: char) -> bool {
    match chr {
        '\t' => true,
        ' ' => true,
        '!' => true,
        '#'..='[' => true,
        ']'..='~' => true,
        _ => {
            let x = chr as u8;
            x >= 0x80
        }
    }
}

// check if character can be escaped
fn is_quoted_char(chr: char) -> bool {
    match chr {
        ' '..='~' => true,
        '\t' => true,
        _ => {
            let x = chr as u8;
            x >= 0x80
        }
    }
}

/// parse single escaped character
fn parse_quoted_pair(data: &str) -> IResult<&str, char> {
    let (data, (_, chr)) = pair(tag("\\"), satisfy(is_quoted_char))(data)?;
    Ok((data, chr))
}

// parse content of quoted string
fn parse_quoted_content(data: &str) -> IResult<&str, Cow<'_, str>> {
    let (mut data, content) = data.split_at_position_complete(|item| !is_qdtext(item))?;

    if data.chars().next() == Some('\\') {
        // we need to escape some characters
        let mut content = content.to_string();
        while data.chars().next() == Some('\\') {
            // unescape next char
            let (next_data, chr) = parse_quoted_pair(data)?;
            content.push(chr);
            data = next_data;

            // parse next plain text chunk
            let (next_data, extra_content) =
                data.split_at_position_complete(|item| !is_qdtext(item))?;
            content.push_str(extra_content);
            data = next_data;
        }
        Ok((data, Cow::Owned(content)))
    } else {
        // quick version, there is no characters to escape
        Ok((data, Cow::Borrowed(content)))
    }
}

fn parse_quoted_string(data: &str) -> IResult<&str, Cow<'_, str>> {
    let (data, (_, content, _)) = tuple((tag("'"), parse_quoted_content, tag("'")))(data)?;

    Ok((data, content))
}
于 2021-08-13T14:43:37.233 回答