0

在 c# 中,我有两个函数迭代 HtmlDocument 的 HtmlElements 我遇到的问题是,当我点击一个包含文本和子元素混合的元素时,该元素的文本部分被忽略。例如如果命中一个节点像:<div> hello1 <div> hello2</div> hello3</div>。

我的函数只会选择 hello2,而 hello1 和 hello3 会被忽略。我真的很想保留解决方案,而不必将 HtmlElement 转换为 XElement。有没有办法得到 hello1 和 hello3?

private static XElement FlattenChildren(this HtmlDocument htmlDoc, string rootname, string rowname)
    {
        XElement result = new XElement(rootname);
        List<XElement> resultList = new List<XElement>();
        HtmlElement htmlRoot = htmlDoc.GetElementsByTagName("html")[0];

        foreach (HtmlElement elem in htmlRoot.Children)
        {
            if (elem.Children.Count == 0)
            {
                if (elem.IgnoreTag() == false && elem.HasQualifiedValue() == true)
                {
                    string gr = elem.GetAttribute("gr");
                    string gs = elem.GetAttribute("gs");
                    if (string.IsNullOrEmpty(gr) == false && string.IsNullOrEmpty(gs) == false)
                    {
                        XElement newelem = new XElement(rowname);
                        newelem.SetAttributeValue("gr", gr);
                        newelem.SetAttributeValue("gs", gs);
                        newelem.Value = elem.InnerText;
                        resultList.Add(newelem);
                    }
                }
            }
            else
            {
                FlattenChildrenIter(elem, rowname, ref resultList);
            }
        }
        foreach (var xelem in resultList)
        {
            result.Add(xelem);
        }
        return result;
    }

private static void FlattenChildrenIter(HtmlElement p, string rowname, ref List<XElement> resultList)
    {
        foreach (HtmlElement elem in p.Children)
        {
            if (elem.Children.Count == 0)
            {
                if (elem.IgnoreTag() == false && elem.HasQualifiedValue() == true)
                {
                    string gr = elem.GetAttribute("gr");
                    string gs = elem.GetAttribute("gs");
                    if (string.IsNullOrEmpty(gr) == false && string.IsNullOrEmpty(gs) == false)
                    {
                        XElement newelem = new XElement(rowname);
                        newelem.SetAttributeValue("gr", gr);
                        newelem.SetAttributeValue("gs", gs);
                        newelem.Value = elem.InnerText;
                        resultList.Add(newelem);
                    }
                }
            }
            else
            {
                if (elem.CountXText() > 0)
                {
                    var test = 5;
                }
                FlattenChildrenIter(elem, rowname, ref resultList);
            }
        }
    }
    private static int CountXText(this HtmlElement e)
    {
        try
        {
            var p = XElement.Parse(e.OuterHtml); //<----this Parsing does not work all the time????
            var textNodes = from c in p.Nodes()
                            where c.NodeType == XmlNodeType.Text
                            select (XText)c;

            return textNodes.Count();
        }
        catch
        {
            return 0;
        }
    }
4

0 回答 0