0

有时我们发现我们从某些网站收到的 HTML 源代码没有正确的标签结尾,这会影响我们的 UI。所以,像

<br /><p>hello the para start here </p> <p>some text and no ending tag

而且没有结束标签。

我想保留 HTML 格式并希望这样

<br /><p>hello the para start here </p> some text and no ending tag

还有一件事是,有时我们会在开始时得到结束标签,这也应该由算法解决。

4

1 回答 1

0

嘿,伙计们,我想了很长时间,最后我有了解决问题的代码,我将其发布在这里,以便其他人可以从中受益....

 public static string RemoveIncompleteTags(string source, string tag)
    {
        source = source.Replace("  ", " ");
        source = source.Replace("/n", string.Empty).Replace("/r", string.Empty).Replace("/t", string.Empty);
        source = source.Replace("<" + tag + "></" + tag + ">", string.Empty);
        source = source.Replace("<" + tag + "> </" + tag + ">", string.Empty);
        source = source.Replace("<" + tag + ">  </" + tag + ">", string.Empty);
        Dictionary<int, string> oDict = new Dictionary<int, string>();
        string[] souceList;
        Dictionary<int, string> final = new Dictionary<int, string>();
        bool opening = false;
        bool operate = false;
        source = source.Replace("  ", " ");
        source = source.Replace(">", "> ").Replace("<", " <");
        source = source.Replace(" >", ">").Replace("< ", "<");
        source = source.Replace("  ", " ").Replace("  ", " ");
        souceList = source.Split(' ');
        for (int i = 0; i < souceList.Length; i++)
        {
            string word = souceList[i];
            if (word.ToLower() == "<" + tag.ToLower() + ">")
            {
                opening = true;
                operate = true;
            }
            else if (word.ToLower() == "</" + tag.ToLower() + ">")
            {
                opening = false;
                operate = true;
            }
            if (operate)
            {
                if (opening)
                {
                    oDict.Add(i, word);
                    final.Add(i, word);
                }
                else
                {
                    if (oDict.Count != 0)
                    {
                        oDict.Remove(oDict.Last().Key);//.ToList().RemoveAt(oDict.Count - 1);
                        final.Add(i, word);
                    }
                    else
                    {
                        // need not to add to the output string 
                        // code if you want to log
                    }
                }
                operate = false;
                opening = false;
            }
            else
            {
                final.Add(i, word);
            }
        }
        if (final.Count > 0)
        {
            if (oDict.Count > 0)
            {
                foreach (var key in oDict.Keys)
                {
                    final.Remove(key);
                }
            }
            StringBuilder fText = new StringBuilder();
            final.ToList().ForEach(wd =>
                {
                    if (wd.Value.Trim().Length > 0)
                        fText.Append(wd.Value.Trim() + " ");
                });
            return fText.ToString().Trim();
        }
        else
        {
            return string.Empty;
        }
    }

谢谢...

于 2012-07-12T06:17:02.950 回答