您可以尝试一些简单的字符串操作,不包括额外的命名空间和工具:
看这个例子,也许它可以解决你的问题:
string html = string.Concat("<p class=\"foo\">",
"<p class=\"bar\">",
"<p>",
"</br>",
"<P>",
"</BR>"); // tags can be upper case as well
string strAux = html;
int tagOpenedAt=-1, tagClosedAt=-1;
bool isError = false;
do
{
tagOpenedAt = strAux.IndexOf('<');
tagClosedAt = strAux.IndexOf('>');
if(tagOpenedAt<tagClosedAt)
{
string fullTag = strAux.Substring(tagOpenedAt, tagClosedAt - tagOpenedAt + 1);
//<p> --> <Paragraph>
if (fullTag.ToLower().Equals("<p>") || fullTag.ToLower().StartsWith("<p "))
html = html.Replace(fullTag, "<Paragraph>");
//</br> --> <LineBreak/>
if (fullTag.ToLower().Equals("</br>"))
html = html.Replace(fullTag, "<LineBreak/>");
//more if conditions as you need them
strAux = strAux.Substring(tagClosedAt + 1);
}
else
{
isError = true;
}
}
while (tagOpenedAt>-1 && tagClosedAt>-1 && !isError);
抱歉代码不好,也许你可以通过简单地执行一次.ToLower()而不是在每个if语句中进行改进。另外,我没有检查坏标签,代码只是假设 html 是有效的。
刚刚编辑了一点
string html = string.Concat("<p class=\"foo\">","\n",
"<p class=\"bar\">", "\n",
"<p>", "\n",
"</br>", "\n",
"<P>", "\n",
"</BR>");
Console.WriteLine("HTML is :\n{0}\n", html);
string strAux = html;
int tagOpenedAt=-1, tagClosedAt=-1;
bool isError = false;
do
{
tagOpenedAt = strAux.IndexOf('<');
tagClosedAt = strAux.IndexOf('>');
if(tagOpenedAt < tagClosedAt)
{
string _fullTag = strAux.Substring(tagOpenedAt, tagClosedAt - tagOpenedAt + 1);
string _lower = _fullTag.ToLower();
string _replace = null;
//<p> --> <Paragraph>
if (_lower.Equals("<p>") || _lower.StartsWith("<p "))
_replace = "<Paragraph>";
//</br> --> <LineBreak/>
if (_lower.Equals("</br>"))
_replace = "<LineBreak/>";
//more if conditions as you need them
if(_replace != null)
{
html = html.Replace(_fullTag, _replace);
Console.WriteLine("Replaced {0} with {1}", _fullTag, _replace);
}
strAux = strAux.Substring(tagClosedAt + 1);
}
else
{
isError = true;
}
}
while (tagOpenedAt>-1 && tagClosedAt>-1 && !isError);
Console.WriteLine("\nNew html is :\n{0}",html);