另一种选择是使用一种经过修改的Lexer来隔离文本中需要进行特定替换的每个离散区域,并标记该块,以便不再在其中运行替换
这是您如何执行此操作的示例:
首先,我们将创建一个指示是否使用特定字符串的类
public class UsageIndicator
{
public string Value { get; private set; }
public bool IsUsed { get; private set; }
public UsageIndicator(string value, bool isUsed)
{
Value = value;
IsUsed = isUsed;
}
public override string ToString()
{
return Value;
}
}
然后我们将定义一个类,它既代表如何在文本中定位“标记”,又代表找到它时要做什么
public class TokenOperation
{
public Regex Pattern { get; private set; }
public Func<string, string> Mutator { get; private set; }
public TokenOperation(string pattern, Func<string, string> mutator)
{
Pattern = new Regex(pattern);
Mutator = mutator;
}
private List<UsageIndicator> ExtractRegions(string source, int index, int length, out int matchedIndex)
{
var result = new List<UsageIndicator>();
var head = source.Substring(0, index);
matchedIndex = 0;
if (head.Length > 0)
{
result.Add(new UsageIndicator(head, false));
matchedIndex = 1;
}
var body = source.Substring(index, length);
body = Mutator(body);
result.Add(new UsageIndicator(body, true));
var tail = source.Substring(index + length);
if (tail.Length > 0)
{
result.Add(new UsageIndicator(tail, false));
}
return result;
}
public void Match(List<UsageIndicator> source)
{
for (var i = 0; i < source.Count; ++i)
{
if (source[i].IsUsed)
{
continue;
}
var value = source[i];
var match = Pattern.Match(value.Value);
if (match.Success)
{
int modifyIBy;
source.RemoveAt(i);
var regions = ExtractRegions(value.Value, match.Index, match.Length, out modifyIBy);
for (var j = 0; j < regions.Count; ++j)
{
source.Insert(i + j, regions[j]);
}
i += modifyIBy;
}
}
}
}
处理完这些事情,把东西放在一起做替换就很简单了
public class Rewriter
{
private readonly List<TokenOperation> _definitions = new List<TokenOperation>();
public void AddPattern(string pattern, Func<string, string> mutator)
{
_definitions.Add(new TokenOperation(pattern, mutator));
}
public void AddLiteral(string pattern, string replacement)
{
AddPattern(Regex.Escape(pattern), x => replacement);
}
public string Rewrite(string value)
{
var workingValue = new List<UsageIndicator> { new UsageIndicator(value, false) };
foreach (var definition in _definitions)
{
definition.Match(workingValue);
}
return string.Join("", workingValue);
}
}
在演示代码(如下)中,请记住添加模式或文字表达式的顺序很重要。首先添加的内容首先被标记化,因此,为了防止://
url 中的 被选为表情符号加斜线,我们首先处理图像块,因为它将包含标签之间的 url 并标记为之前使用的表情规则可以尝试获取。
class Program
{
static void Main(string[] args)
{
var rewriter = new Rewriter();
rewriter.AddPattern(@"\[img\].*?\[/img\]", x => x.Replace("[img]", "<img src=\"").Replace("[/img]", "\"/>"));
rewriter.AddLiteral(":/", "<img src=\"emote-sigh.png\"/>");
rewriter.AddLiteral(":(", "<img src=\"emote-frown.png\"/>");
rewriter.AddLiteral(":P", "<img src=\"emote-tongue.png\"/>");
const string str = "Stacks be [img]http://example.com/overflowing.png[/img] :/";
Console.WriteLine(rewriter.Rewrite(str));
}
}
样本打印:
Stacks be <img src="http://example.com/overflowing.png"/> <img src="emote-sigh.png"/>