1

我正在使用 htmlagilitypack 来解析 xml 文档。我使用它将字符串加载为 htmldocument,然后使用 xmltextreader 进行解析。我偶尔会在 htmlagility.dll 上得到一个未处理的 stackoverflow 异常。具体线路是

internal Dictionary<string, Htmlattribute> Hashitems = new 
                            Dictionary<string, HtmlAttribute>()

编辑:

Try
            Dim hdoc = New HtmlAgilityPack.HtmlDocument()
            hdoc.LoadHtml(xmlsnippet)
            Dim nreader As XmlTextReader = New 
                         XmlTextReader(New StringReader(xmlsnippet))
            Dim ncount As Integer = 0

            While nreader.Read
                If Not nreader.Name = "" Then
                    ncount += 1
                    If ncount = 18 Then
                        Exit While
                    End If
                    num += 1
                    nodelist.Add(nreader.Name)
                    If nreader.Name = "id" Then
                        statid = nreader.ReadInnerXml
                    End If
                    If nreader.Name = "published" Then
                        contentDate = nreader.ReadInnerXml
                        contentDate = Regex.Replace(contentDate, "T", " ")
                        contentDate = Regex.Replace(contentDate, "\+", " ")
                        contentDate = contentDate.Replace("Z", "")
                    End If
                    If nreader.Name = "summary" Then
                        ctext = nreader.ReadInnerXml
                    End If
                    If nreader.Name = "title" Then
                        csubject = nreader.ReadInnerXml
                        If csubject.Contains("posted") Then
                            template = csubject
                            author = Regex.Replace(template, "posted.*", "")
                        End If
                        If csubject.Contains("Keyword -") Then
                          Dim tip As String = csubject
                            searchterm = 
                            Regex.Replace(csubject, "xxxxxx.*xxxxxx.*xxxx.*-", "")
                            searchterm = 
                           Regex.Replace(searchterm, "xxxxx.*xxxxxx.*Search.*-", "")
                            Trim(searchterm)
                        End If
                    End If
                End If
            End While

            Dim mreader As XmlTextReader = 
                        New XmlTextReader(New StringReader(xmlsnippet))
            Dim mcount As Integer = 0

            While mreader.Read
                If Not mreader.Name = "" Then
                    mcount += 1
                    If mcount > 15 Then
                        If mreader.Name = "uri" Then
                            authorUri = mreader.ReadInnerXml
                            Trim(authorUri)
                            If authorUri = "http://www.xxxxxxxx.com/" Then
                                authorUri = ""
                            End If
                        End If
                        If mreader.Name = "name" Then
                            author = mreader.ReadInnerXml
                            If author = "xxxxxx" Then
                                author = ""
                            End If
                        End If
                        If mreader.Name = "content" Then
                            htext = mreader.ReadInnerXml
                        End If
                        If mreader.Name = "link" Then
                            Dim address As String
                            address = mreader.ReadOuterXml
                            If address.Contains("related") Then
                                Dim regex12 As Regex =
            New Regex("<link.*rel.*href=""(?<Link>.*?)"".*/>", RegexOptions.IgnoreCase)
                                Dim m12 As Match = regex12.Match(address)
                                himage = m12.Groups("Link").Value
                            ElseIf address.Contains("alternate") Then
                                Dim regex13 As Regex = 
           New Regex("<link.*rel.*href=""(?<Link>.*?)"".*/>", RegexOptions.IgnoreCase)
                                Dim m13 As Match = regex13.Match(address)
                                authorUri = m13.Groups("Link").Value
                            End If
                        End If
                        If mreader.Name = "subtitle" Then
                            hsubtitle = mreader.ReadInnerXml
                        End If
                    End If
                End If
            End While

        Catch ex As Exception
            appLogs.constructLog(ex.Message.ToString, True, True)
            Exit Sub
        End Try

事实上,不同的行会发生不同的 stackoverflow 异常错误,但相同的错误仅在使用 htmlagilitypack 时发生。我在另一种方法中尝试使用 xmldocument、xpathnavigator 解析 xml,除非我得到一些错误的 xml,否则它工作正常,然后我使用这个方法。我已经设置了异常捕获,只是将错误的 xml 移动到一个文件夹,然后退出此方法,但我无法捕获这些异常,或者我可以吗?

错误显示的另一行:

public string Name
{
   get
   {
     if (_name == null)
     {
       Name = _ownerdocument.Text.Substring(_namestartindex, _namelength);
     }
      return _name != null ? _name.ToLower() : string.Empty;

在文件 HtmlNode.cs 中上述代码段的最后一行。调用堆栈窗口顶部显示为

HtmlAgilityPack.dll!HtmlAgilityPack.HtmlNode.Name.get() Line 432 + 0x21 bytes
4

0 回答 0