我有一种方法可以提取超过 XX 个单词的文本块。问题是它不会返回该文本内的链接。
我的方法:
public string getAllTextHTML(string _html)
{
string _allText = "";
try
{
HtmlAgilityPack.HtmlDocument document = new HtmlAgilityPack.HtmlDocument();
document.LoadHtml(_html);
document.DocumentNode.Descendants()
.Where(n => n.Name == "script" || n.Name == "style")
.ToList()
.ForEach(n => n.Remove());
RemoveComments(document.DocumentNode);
var root = document.DocumentNode;
var sb = new StringBuilder();
foreach (var node in root.DescendantNodesAndSelf())
{
if (!node.HasChildNodes)
{
string text = node.InnerHtml;
if (!string.IsNullOrEmpty(text))
{
int antalOrd = WordCounting.CountWords1(text);
if (antalOrd > 25)
{
text = System.Web.HttpUtility.HtmlDecode(text);
sb.AppendLine(text.Trim());
}
}
}
}
_allText = sb.ToString();
}
catch (Exception)
{
}
_allText = System.Web.HttpUtility.HtmlDecode(_allText);
return _allText;
}
我怎样才能让这也让我获得文本中的链接?