0

我正在尝试编写一个电子邮件主题行解析器,用户可以在其中定义自己的解析规则。规则将匹配主题行上的成员名称,然后将其用作查找。问题是成员名称可能包含解析规则分隔符。

// This rule has defined that any text between > matches a member name. 
// Note the user can make up any parsing rule so this is just an example.
string sampleRule = ">{member}>";

    // left out parsing code. We have figured out we are looking
   // for member and the prefix/postfix delimiters.


string prefix = ">";
string postfix = ">";

// note that member>Name3 is a valid member name
string subject =  "Subject>memberName1>memberName2>member>Name3>EndSubject";
string pattern = "(?="+prefix+"([a-z].+?)"+postfix+")";

Match m = Regex.Match(subject, pattern);

while(m.Success) {
    // this is a possible member name
    Console.WriteLine(m.Groups[1].ToString());
    m = m.NextMatch();
}

// the output needs to be
// memberName1
// memberName2
// Member>Name3    

// It is currently

// memberName1
// memberName2
// member

// Note that spanning bad matches are ok, for example
// memberName1>memberName2 or memberName1>memberName2>member>Name3
4

1 回答 1

0

这是使用正则表达式和递归的脆弱尝试:

static class Program
{
    static void Main(string[] args)
    {
        string prefix = ">";
        string suffix = ">";
        string subject =
            "Subject>memberName1>memberName2>member>Name3>EndSubject";
        var result = Find(subject, true, prefix, suffix).ToList();
        result.ForEach(item =>
        {
            Console.WriteLine(item);
        });
        /* The output is:
        memberName1>memberName2
        member>Name3                *match
        memberName1                 *match
        memberName2                 *match
        member
        Name3
         */
    }

    private static IEnumerable<string> Find(
        string subject,
        bool toggle,
        string prefix,
        string suffix)
    {
        string
            r1 = @"(?<=" + prefix + @")(?>([\w]*(" + prefix +
            "|" + suffix + @")[\w]*))(?=" + suffix + ")",
            r2 = @"[\w]*";
        var temp = Regex.Matches(subject, toggle ?
            r1 : r2
            )
            .Cast<Match>()
            .ToList();

        return temp.SelectMany(m =>
            temp
            .Select(i => i.Value)
            .Union(Find(m.Value, !toggle, prefix, suffix)))
            .Where(s => !String.IsNullOrEmpty(s))
            .Distinct();
    }
}

注意:我不确定在您的示例中,>inmember>Name3是否被视为前缀或后缀。

[编辑] 这是另一种方法,它不使用正则表达式。考虑到>inmember>Name3可以是前缀后缀:

var separators = new[] { prefix, suffix };

var firstResult = separators
    .SelectMany(s => subject
        .Split(separators,StringSplitOptions.RemoveEmptyEntries)
        .Skip(1)
        .Reverse()
        .Skip(1)
        .Reverse())
    .Distinct()
    .ToList();

var result = firstResult
    .Zip(firstResult.Skip(1), (a, b) =>
    {
        var l = new List<string>();
        separators.ToList().ForEach(s =>
        {
            l.Add(String.Format("{0}{1}{2}", a, s, b));
        });
        return l;
    })
    .SelectMany(s => s)
    .Union(firstResult)
    .ToList();
于 2013-08-01T12:24:48.987 回答