根据@Unknwntech 提供的答案,这是一个等效于位于"bad words" filter的封闭线程的 C#:
public string ReplaceBadWords(string data, string[] badWords, out int badWordCount)
{
int count = 0;
Regex r;
string op = data;
foreach (var word in badWords)
{
var expword = ExpandBadWordToIncludeIntentionalMisspellings(word);
r = new Regex(@"(?<Pre>\s+)(?<Word>" + expword + @")(?<Post>\s+|\!\?|\.)");
var matches = r.Matches(data);
foreach (Match match in matches)
{
string pre = match.Groups["Pre"].Value;
string post = match.Groups["Post"].Value;
string output = pre + new string('*', word.Length) + post;
op = op.Replace(match.Value, output);
count++;
}
}
badWordCount = count;
return op;
}
public string ExpandBadWordToIncludeIntentionalMisspellings(string word)
{
var chars = word
.ToCharArray();
var op = "[" + string.Join("][", chars) + "]";
return op
.Replace("[a]", "[a A @]")
.Replace("[b]", "[b B I3 l3 i3]")
.Replace("[c]", "(?:[c C \\(]|[k K])")
.Replace("[d]", "[d D]")
.Replace("[e]", "[e E 3]")
.Replace("[f]", "(?:[f F]|[ph pH Ph PH])")
.Replace("[g]", "[g G 6]")
.Replace("[h]", "[h H]")
.Replace("[i]", "[i I l ! 1]")
.Replace("[j]", "[j J]")
.Replace("[k]", "(?:[c C \\(]|[k K])")
.Replace("[l]", "[l L 1 ! i]")
.Replace("[m]", "[m M]")
.Replace("[n]", "[n N]")
.Replace("[o]", "[o O 0]")
.Replace("[p]", "[p P]")
.Replace("[q]", "[q Q 9]")
.Replace("[r]", "[r R]")
.Replace("[s]", "[s S $ 5]")
.Replace("[t]", "[t T 7]")
.Replace("[u]", "[u U v V]")
.Replace("[v]", "[v V u U]")
.Replace("[w]", "[w W vv VV]")
.Replace("[x]", "[x X]")
.Replace("[y]", "[y Y]")
.Replace("[z]", "[z Z 2]")
;
}
只要你有一个好的坏词列表,这在防止错误(是的,谷歌它)方面做得相当好。