在 c# 中,我有两个函数迭代 HtmlDocument 的 HtmlElements 我遇到的问题是,当我点击一个包含文本和子元素混合的元素时,该元素的文本部分被忽略。例如如果命中一个节点像:<div> hello1 <div> hello2</div> hello3</div>。
我的函数只会选择 hello2,而 hello1 和 hello3 会被忽略。我真的很想保留解决方案,而不必将 HtmlElement 转换为 XElement。有没有办法得到 hello1 和 hello3?
private static XElement FlattenChildren(this HtmlDocument htmlDoc, string rootname, string rowname)
{
XElement result = new XElement(rootname);
List<XElement> resultList = new List<XElement>();
HtmlElement htmlRoot = htmlDoc.GetElementsByTagName("html")[0];
foreach (HtmlElement elem in htmlRoot.Children)
{
if (elem.Children.Count == 0)
{
if (elem.IgnoreTag() == false && elem.HasQualifiedValue() == true)
{
string gr = elem.GetAttribute("gr");
string gs = elem.GetAttribute("gs");
if (string.IsNullOrEmpty(gr) == false && string.IsNullOrEmpty(gs) == false)
{
XElement newelem = new XElement(rowname);
newelem.SetAttributeValue("gr", gr);
newelem.SetAttributeValue("gs", gs);
newelem.Value = elem.InnerText;
resultList.Add(newelem);
}
}
}
else
{
FlattenChildrenIter(elem, rowname, ref resultList);
}
}
foreach (var xelem in resultList)
{
result.Add(xelem);
}
return result;
}
private static void FlattenChildrenIter(HtmlElement p, string rowname, ref List<XElement> resultList)
{
foreach (HtmlElement elem in p.Children)
{
if (elem.Children.Count == 0)
{
if (elem.IgnoreTag() == false && elem.HasQualifiedValue() == true)
{
string gr = elem.GetAttribute("gr");
string gs = elem.GetAttribute("gs");
if (string.IsNullOrEmpty(gr) == false && string.IsNullOrEmpty(gs) == false)
{
XElement newelem = new XElement(rowname);
newelem.SetAttributeValue("gr", gr);
newelem.SetAttributeValue("gs", gs);
newelem.Value = elem.InnerText;
resultList.Add(newelem);
}
}
}
else
{
if (elem.CountXText() > 0)
{
var test = 5;
}
FlattenChildrenIter(elem, rowname, ref resultList);
}
}
}
private static int CountXText(this HtmlElement e)
{
try
{
var p = XElement.Parse(e.OuterHtml); //<----this Parsing does not work all the time????
var textNodes = from c in p.Nodes()
where c.NodeType == XmlNodeType.Text
select (XText)c;
return textNodes.Count();
}
catch
{
return 0;
}
}