感谢您的投入。我写了一个快速而肮脏的基准来测试你的输入。我已经用 500.000 次迭代测试了解析 4 个字符串并完成了 4 次传递。结果如下:
*** 通过 1
旧的 (Chained String.Replace()) 方式在 814 毫秒内完成
logicnp (ToCharArray) 方式在 916 毫秒内完成
oleksii (StringBuilder) 方式在 943 毫秒内完成
André Christoffer Andersen (Lambda w/ Aggregate) 方式在 2551 毫秒内完成
Richard(带 MatchEvaluator 的正则表达式)方式在 215 毫秒内完成
Marc Gravell(静态正则表达式)方式在 1008 毫秒内完成
*** 通过 2
旧的(Chained String.Replace())方式在 786 毫秒内完成
logicnp (ToCharArray) 方式在 920 毫秒内完成
oleksii (StringBuilder) 方式在 905 毫秒内完成
André Christoffer Andersen(Lambda w/Aggregate)方式在 2515 毫秒内完成
Richard(带 MatchEvaluator 的正则表达式)方式在 217 毫秒内完成
Marc Gravell(静态正则表达式)方式在 1025 毫秒内完成
*** 通过 3
旧的(Chained String.Replace())方式在 775 毫秒内完成
logicnp (ToCharArray) 方式在 903 毫秒内完成
oleksii (StringBuilder) 方式在 931 毫秒内完成
André Christoffer Andersen (Lambda w/ Aggregate) 方式在 2529 毫秒内完成
Richard(带 MatchEvaluator 的正则表达式)方式在 214 毫秒内完成
Marc Gravell(静态正则表达式)方式在 1022 毫秒内完成
*** 通过 4
旧的(Chained String.Replace())方式在 799 毫秒内完成
logicnp (ToCharArray) 方式在 908 毫秒内完成
oleksii (StringBuilder) 方式在 938 毫秒内完成
André Christoffer Andersen(Lambda w/Aggregate)方式在 2592 毫秒内完成
Richard(带 MatchEvaluator 的正则表达式)方式在 225 毫秒内完成
Marc Gravell(静态正则表达式)方式在 1050 毫秒内完成
此基准的代码如下。请查看代码并确认@Richard 获得了最快的方法。请注意,我没有检查输出是否正确,我认为它们是正确的。
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Diagnostics;
using System.Text.RegularExpressions;
namespace StringReplaceTest
{
class Program
{
static string test1 = "A^@[BCD";
static string test2 = "E]FGH\\";
static string test3 = "ijk`l}m";
static string test4 = "nopq~{r";
static readonly Dictionary<char, string> repl =
new Dictionary<char, string>
{
{'^', "Č"}, {'@', "Ž"}, {'[', "Š"}, {']', "Ć"}, {'`', "ž"}, {'}', "ć"}, {'~', "č"}, {'{', "š"}, {'\\', "Đ"}
};
static readonly Regex replaceRegex;
static Program() // static initializer
{
StringBuilder pattern = new StringBuilder().Append('[');
foreach (var key in repl.Keys)
pattern.Append(Regex.Escape(key.ToString()));
pattern.Append(']');
replaceRegex = new Regex(pattern.ToString(), RegexOptions.Compiled);
}
public static string Sanitize(string input)
{
return replaceRegex.Replace(input, match =>
{
return repl[match.Value[0]];
});
}
static string DoGeneralReplace(string input)
{
var sb = new StringBuilder(input);
return sb.Replace('^', 'Č').Replace('@', 'Ž').Replace('[', 'Š').Replace(']', 'Ć').Replace('`', 'ž').Replace('}', 'ć').Replace('~', 'č').Replace('{', 'š').Replace('\\', 'Đ').ToString();
}
//Method for replacing chars with a mapping
static string Replace(string input, IDictionary<char, char> replacementMap)
{
return replacementMap.Keys
.Aggregate(input, (current, oldChar)
=> current.Replace(oldChar, replacementMap[oldChar]));
}
static void Main(string[] args)
{
for (int i = 1; i < 5; i++)
DoIt(i);
}
static void DoIt(int n)
{
Stopwatch sw = new Stopwatch();
int idx = 0;
Console.WriteLine("*** Pass " + n.ToString());
// old way
sw.Start();
for (idx = 0; idx < 500000; idx++)
{
string result1 = test1.Replace('^', 'Č').Replace('@', 'Ž').Replace('[', 'Š').Replace(']', 'Ć').Replace('`', 'ž').Replace('}', 'ć').Replace('~', 'č').Replace('{', 'š').Replace('\\', 'Đ');
string result2 = test2.Replace('^', 'Č').Replace('@', 'Ž').Replace('[', 'Š').Replace(']', 'Ć').Replace('`', 'ž').Replace('}', 'ć').Replace('~', 'č').Replace('{', 'š').Replace('\\', 'Đ');
string result3 = test3.Replace('^', 'Č').Replace('@', 'Ž').Replace('[', 'Š').Replace(']', 'Ć').Replace('`', 'ž').Replace('}', 'ć').Replace('~', 'č').Replace('{', 'š').Replace('\\', 'Đ');
string result4 = test4.Replace('^', 'Č').Replace('@', 'Ž').Replace('[', 'Š').Replace(']', 'Ć').Replace('`', 'ž').Replace('}', 'ć').Replace('~', 'č').Replace('{', 'š').Replace('\\', 'Đ');
}
sw.Stop();
Console.WriteLine("Old (Chained String.Replace()) way completed in " + sw.ElapsedMilliseconds.ToString() + " ms");
Dictionary<char, char> replacements = new Dictionary<char, char>();
replacements.Add('^', 'Č');
replacements.Add('@', 'Ž');
replacements.Add('[', 'Š');
replacements.Add(']', 'Ć');
replacements.Add('`', 'ž');
replacements.Add('}', 'ć');
replacements.Add('~', 'č');
replacements.Add('{', 'š');
replacements.Add('\\', 'Đ');
// logicnp way
sw.Reset();
sw.Start();
for (idx = 0; idx < 500000; idx++)
{
char[] charArray1 = test1.ToCharArray();
for (int i = 0; i < charArray1.Length; i++)
{
char newChar;
if (replacements.TryGetValue(test1[i], out newChar))
charArray1[i] = newChar;
}
string result1 = new string(charArray1);
char[] charArray2 = test2.ToCharArray();
for (int i = 0; i < charArray2.Length; i++)
{
char newChar;
if (replacements.TryGetValue(test2[i], out newChar))
charArray2[i] = newChar;
}
string result2 = new string(charArray2);
char[] charArray3 = test3.ToCharArray();
for (int i = 0; i < charArray3.Length; i++)
{
char newChar;
if (replacements.TryGetValue(test3[i], out newChar))
charArray3[i] = newChar;
}
string result3 = new string(charArray3);
char[] charArray4 = test4.ToCharArray();
for (int i = 0; i < charArray4.Length; i++)
{
char newChar;
if (replacements.TryGetValue(test4[i], out newChar))
charArray4[i] = newChar;
}
string result4 = new string(charArray4);
}
sw.Stop();
Console.WriteLine("logicnp (ToCharArray) way completed in " + sw.ElapsedMilliseconds.ToString() + " ms");
// oleksii way
sw.Reset();
sw.Start();
for (idx = 0; idx < 500000; idx++)
{
string result1 = DoGeneralReplace(test1);
string result2 = DoGeneralReplace(test2);
string result3 = DoGeneralReplace(test3);
string result4 = DoGeneralReplace(test4);
}
sw.Stop();
Console.WriteLine("oleksii (StringBuilder) way completed in " + sw.ElapsedMilliseconds.ToString() + " ms");
// André Christoffer Andersen way
sw.Reset();
sw.Start();
for (idx = 0; idx < 500000; idx++)
{
string result1 = Replace(test1, replacements);
string result2 = Replace(test2, replacements);
string result3 = Replace(test3, replacements);
string result4 = Replace(test4, replacements);
}
sw.Stop();
Console.WriteLine("André Christoffer Andersen (Lambda w/ Aggregate) way completed in " + sw.ElapsedMilliseconds.ToString() + " ms");
// Richard way
sw.Reset();
sw.Start();
Regex reg = new Regex(@"\^|@|\[|\]|`|\}|~|\{|\\");
MatchEvaluator eval = match =>
{
switch (match.Value)
{
case "^": return "Č";
case "@": return "Ž";
case "[": return "Š";
case "]": return "Ć";
case "`": return "ž";
case "}": return "ć";
case "~": return "č";
case "{": return "š";
case "\\": return "Đ";
default: throw new Exception("Unexpected match!");
}
};
for (idx = 0; idx < 500000; idx++)
{
string result1 = reg.Replace(test1, eval);
string result2 = reg.Replace(test2, eval);
string result3 = reg.Replace(test3, eval);
string result4 = reg.Replace(test4, eval);
}
sw.Stop();
Console.WriteLine("Richard (Regex w/ MatchEvaluator) way completed in " + sw.ElapsedMilliseconds.ToString() + " ms");
// Marc Gravell way
sw.Reset();
sw.Start();
for (idx = 0; idx < 500000; idx++)
{
string result1 = Sanitize(test1);
string result2 = Sanitize(test2);
string result3 = Sanitize(test3);
string result4 = Sanitize(test4);
}
sw.Stop();
Console.WriteLine("Marc Gravell (Static Regex) way completed in " + sw.ElapsedMilliseconds.ToString() + " ms\n");
}
}
}
编辑 2020 年 6 月
由于此问答仍然受到欢迎,我想使用带有 IndexOfAny 的 StringBuilder 使用来自 user1664043 的额外输入来更新它,这次是使用 .NET Core 3.1 编译的,结果如下:
*** 通过 1
旧的 (Chained String.Replace()) 方式在 199 毫秒内完成
logicnp (ToCharArray) 方式在 296 毫秒内完成
oleksii (StringBuilder) 方式在 416 毫秒内完成
André Christoffer Andersen(Lambda w/Aggregate)方式在 870 毫秒内完成
Richard (Regex w/ MatchEvaluator) 方式在 1722 毫秒内完成
Marc Gravell(静态正则表达式)方式在 395 毫秒内完成
user1664043 (StringBuilder w/IndexOfAny) 方式在 459 毫秒内完成
*** 通过 2
旧的(Chained String.Replace())方式在 215 毫秒内完成
logicnp (ToCharArray) 方式在 239 毫秒内完成
oleksii (StringBuilder) 方式在 341 毫秒内完成
André Christoffer Andersen(Lambda w/Aggregate)方式在 758 毫秒内完成
理查德(Regex w/ MatchEvaluator)方式在 1591 毫秒内完成
Marc Gravell(静态正则表达式)方式在 354 毫秒内完成
user1664043 (StringBuilder w/IndexOfAny) 方式在 426 毫秒内完成
*** 通过 3
旧的 (Chained String.Replace()) 方式在 199 毫秒内完成
logicnp (ToCharArray) 方式在 265 毫秒内完成
oleksii (StringBuilder) 方式在 337 毫秒内完成
André Christoffer Andersen(Lambda w/Aggregate)方式在 817 毫秒内完成
Richard (Regex w/ MatchEvaluator) 方式在 1666 毫秒内完成
Marc Gravell(静态正则表达式)方式在 373 毫秒内完成
user1664043 (StringBuilder w/IndexOfAny) 方式在 412 毫秒内完成
*** 通过 4
旧的 (Chained String.Replace()) 方式在 199 毫秒内完成
logicnp (ToCharArray) 方式在 230 毫秒内完成
oleksii (StringBuilder) 方式在 324 毫秒内完成
André Christoffer Andersen(Lambda w/Aggregate)方式在 791 毫秒内完成
Richard (Regex w/ MatchEvaluator) 方式在 1699 毫秒内完成
Marc Gravell(静态正则表达式)方式在 359 毫秒内完成
user1664043 (StringBuilder w/IndexOfAny) 方式在 413 毫秒内完成
和更新的代码:
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
namespace Test.StringReplace
{
class Program
{
static string test1 = "A^@[BCD";
static string test2 = "E]FGH\\";
static string test3 = "ijk`l}m";
static string test4 = "nopq~{r";
static readonly Dictionary<char, string> repl =
new Dictionary<char, string>
{
{'^', "Č"}, {'@', "Ž"}, {'[', "Š"}, {']', "Ć"}, {'`', "ž"}, {'}', "ć"}, {'~', "č"}, {'{', "š"}, {'\\', "Đ"}
};
static readonly Regex replaceRegex;
static readonly char[] badChars = new char[] { '^', '@', '[', ']', '`', '}', '~', '{', '\\' };
static readonly char[] replacementChars = new char[] { 'Č', 'Ž', 'Š', 'Ć', 'ž', 'ć', 'č', 'š', 'Đ' };
static Program() // static initializer
{
StringBuilder pattern = new StringBuilder().Append('[');
foreach (var key in repl.Keys)
pattern.Append(Regex.Escape(key.ToString()));
pattern.Append(']');
replaceRegex = new Regex(pattern.ToString(), RegexOptions.Compiled);
}
public static string Sanitize(string input)
{
return replaceRegex.Replace(input, match =>
{
return repl[match.Value[0]];
});
}
static string DoGeneralReplace(string input)
{
var sb = new StringBuilder(input);
return sb.Replace('^', 'Č').Replace('@', 'Ž').Replace('[', 'Š').Replace(']', 'Ć').Replace('`', 'ž').Replace('}', 'ć').Replace('~', 'č').Replace('{', 'š').Replace('\\', 'Đ').ToString();
}
//Method for replacing chars with a mapping
static string Replace(string input, IDictionary<char, char> replacementMap)
{
return replacementMap.Keys
.Aggregate(input, (current, oldChar)
=> current.Replace(oldChar, replacementMap[oldChar]));
}
static string ReplaceCharsWithIndexOfAny(string sIn)
{
int replChar = sIn.IndexOfAny(badChars);
if (replChar < 0)
return sIn;
// Don't even bother making a copy unless you know you have something to swap
StringBuilder sb = new StringBuilder(sIn, 0, replChar, sIn.Length + 10);
while (replChar >= 0 && replChar < sIn.Length)
{
var c = replacementChars[replChar];
sb.Append(c);
////// This approach lets you swap a char for a string or to remove some
////// If you had a straight char for char swap, you could just have your repl chars in an array with the same ordinals and do it all in 2 lines matching the ordinals.
////c = c switch
////{
//// ////case "^":
//// //// c = "Č";
//// //// ...
//// '\ufeff' => null,
//// _ => replacementChars[replChar],
////};
////if (c != null)
////{
//// sb.Append(c);
////}
replChar++; // skip over what we just replaced
if (replChar < sIn.Length)
{
int nextRepChar = sIn.IndexOfAny(badChars, replChar);
sb.Append(sIn, replChar, (nextRepChar > 0 ? nextRepChar : sIn.Length) - replChar);
replChar = nextRepChar;
}
}
return sb.ToString();
}
static void Main(string[] args)
{
for (int i = 1; i < 5; i++)
DoIt(i);
}
static void DoIt(int n)
{
Stopwatch sw = new Stopwatch();
int idx = 0;
Console.WriteLine("*** Pass " + n.ToString());
// old way
sw.Start();
for (idx = 0; idx < 500000; idx++)
{
string result1 = test1.Replace('^', 'Č').Replace('@', 'Ž').Replace('[', 'Š').Replace(']', 'Ć').Replace('`', 'ž').Replace('}', 'ć').Replace('~', 'č').Replace('{', 'š').Replace('\\', 'Đ');
string result2 = test2.Replace('^', 'Č').Replace('@', 'Ž').Replace('[', 'Š').Replace(']', 'Ć').Replace('`', 'ž').Replace('}', 'ć').Replace('~', 'č').Replace('{', 'š').Replace('\\', 'Đ');
string result3 = test3.Replace('^', 'Č').Replace('@', 'Ž').Replace('[', 'Š').Replace(']', 'Ć').Replace('`', 'ž').Replace('}', 'ć').Replace('~', 'č').Replace('{', 'š').Replace('\\', 'Đ');
string result4 = test4.Replace('^', 'Č').Replace('@', 'Ž').Replace('[', 'Š').Replace(']', 'Ć').Replace('`', 'ž').Replace('}', 'ć').Replace('~', 'č').Replace('{', 'š').Replace('\\', 'Đ');
}
sw.Stop();
Console.WriteLine("Old (Chained String.Replace()) way completed in " + sw.ElapsedMilliseconds.ToString() + " ms");
Dictionary<char, char> replacements = new Dictionary<char, char>();
replacements.Add('^', 'Č');
replacements.Add('@', 'Ž');
replacements.Add('[', 'Š');
replacements.Add(']', 'Ć');
replacements.Add('`', 'ž');
replacements.Add('}', 'ć');
replacements.Add('~', 'č');
replacements.Add('{', 'š');
replacements.Add('\\', 'Đ');
// logicnp way
sw.Reset();
sw.Start();
for (idx = 0; idx < 500000; idx++)
{
char[] charArray1 = test1.ToCharArray();
for (int i = 0; i < charArray1.Length; i++)
{
char newChar;
if (replacements.TryGetValue(test1[i], out newChar))
charArray1[i] = newChar;
}
string result1 = new string(charArray1);
char[] charArray2 = test2.ToCharArray();
for (int i = 0; i < charArray2.Length; i++)
{
char newChar;
if (replacements.TryGetValue(test2[i], out newChar))
charArray2[i] = newChar;
}
string result2 = new string(charArray2);
char[] charArray3 = test3.ToCharArray();
for (int i = 0; i < charArray3.Length; i++)
{
char newChar;
if (replacements.TryGetValue(test3[i], out newChar))
charArray3[i] = newChar;
}
string result3 = new string(charArray3);
char[] charArray4 = test4.ToCharArray();
for (int i = 0; i < charArray4.Length; i++)
{
char newChar;
if (replacements.TryGetValue(test4[i], out newChar))
charArray4[i] = newChar;
}
string result4 = new string(charArray4);
}
sw.Stop();
Console.WriteLine("logicnp (ToCharArray) way completed in " + sw.ElapsedMilliseconds.ToString() + " ms");
// oleksii way
sw.Reset();
sw.Start();
for (idx = 0; idx < 500000; idx++)
{
string result1 = DoGeneralReplace(test1);
string result2 = DoGeneralReplace(test2);
string result3 = DoGeneralReplace(test3);
string result4 = DoGeneralReplace(test4);
}
sw.Stop();
Console.WriteLine("oleksii (StringBuilder) way completed in " + sw.ElapsedMilliseconds.ToString() + " ms");
// André Christoffer Andersen way
sw.Reset();
sw.Start();
for (idx = 0; idx < 500000; idx++)
{
string result1 = Replace(test1, replacements);
string result2 = Replace(test2, replacements);
string result3 = Replace(test3, replacements);
string result4 = Replace(test4, replacements);
}
sw.Stop();
Console.WriteLine("André Christoffer Andersen (Lambda w/ Aggregate) way completed in " + sw.ElapsedMilliseconds.ToString() + " ms");
// Richard way
sw.Reset();
sw.Start();
Regex reg = new Regex(@"\^|@|\[|\]|`|\}|~|\{|\\");
MatchEvaluator eval = match =>
{
switch (match.Value)
{
case "^": return "Č";
case "@": return "Ž";
case "[": return "Š";
case "]": return "Ć";
case "`": return "ž";
case "}": return "ć";
case "~": return "č";
case "{": return "š";
case "\\": return "Đ";
default: throw new Exception("Unexpected match!");
}
};
for (idx = 0; idx < 500000; idx++)
{
string result1 = reg.Replace(test1, eval);
string result2 = reg.Replace(test2, eval);
string result3 = reg.Replace(test3, eval);
string result4 = reg.Replace(test4, eval);
}
sw.Stop();
Console.WriteLine("Richard (Regex w/ MatchEvaluator) way completed in " + sw.ElapsedMilliseconds.ToString() + " ms");
// Marc Gravell way
sw.Reset();
sw.Start();
for (idx = 0; idx < 500000; idx++)
{
string result1 = Sanitize(test1);
string result2 = Sanitize(test2);
string result3 = Sanitize(test3);
string result4 = Sanitize(test4);
}
sw.Stop();
Console.WriteLine("Marc Gravell (Static Regex) way completed in " + sw.ElapsedMilliseconds.ToString() + " ms");
// user1664043 way
sw.Reset();
sw.Start();
for (idx = 0; idx < 500000; idx++)
{
string result1 = ReplaceCharsWithIndexOfAny(test1);
string result2 = ReplaceCharsWithIndexOfAny(test2);
string result3 = ReplaceCharsWithIndexOfAny(test3);
string result4 = ReplaceCharsWithIndexOfAny(test4);
}
sw.Stop();
Console.WriteLine("user1664043 (StringBuilder w/ IndexOfAny) way completed in " + sw.ElapsedMilliseconds.ToString() + " ms\n");
}
}
}