至少对我而言,涉及正则表达式的所有内容都存在一定程度的不确定性,但是为您的三个输入提供以下代码有效:
string[] urls = new string[]
{
"http://open.thumbshots.org/image.pxf?url=www.party.com",
"http://www.xclicks.net/sc/ct.php?s=9971&l=http%3A//www.google.com/imgres%3F",
"http://whos.amung.us/pingjs/?k=yvybju40twbs&t=Mudswimmer%3A%20Spam%20%26%20Crap%3A%20Http%3AUniversity.com%3A%20No%20Animals%20Allowed..&c=c&y=htt"
};
foreach (var url in urls)
{
var result = HttpUtility.ParseQueryString(new Uri(url, UriKind.Absolute).Query);
foreach (string item in result)
{
string value = result.GetValues(item).Single();
const string DomainNamePattern = "(?:www\\.|\\b)(?<domain>([a-z0-9]([-a-z0-9]*[a-z0-9])?\\.)+((a[cdefgilmnoqrstuwxz]|aero|arpa)|(b[abdefghijmnorstvwyz]|biz)|(cat|com|coop|c[acdfghiklmnorsuvxyz])|d[ejkmoz]|(e[ceghrstu]|edu)|f[ijkmor]|(g[abdefghilmnpqrstuwy]|gov)|h[kmnrtu]|(i[delmnoqrst]|info|int)|(j[emop]|jobs)|k[eghimnprwyz]|l[abcikrstuvy]|(m[acdghklmnopqrstuvwxyz]|mil|mobi|museum)|(n[acefgilopruz]|name|net)|(om|org)|(p[aefghklmnrstwy]|pro)|qa|r[eouw]|s[abcdeghijklmnortvyz]|(t[cdfghjklmnoprtvwz]|travel)|u[agkmsyz]|v[aceginu]|w[fs]|y[etu]|z[amw]))";
var match = Regex.Match(
value,
DomainNamePattern,
RegexOptions.IgnoreCase);
if (match.Success)
{
string domain = match.Groups["domain"].Value;
Console.WriteLine(domain);
}
}
}
使用的正则表达式改编自这里。
如果你运行它,你会得到以下输出:
// party.com
// google.com
// University.com