0

我有一个大约 52k 链接的文件。我想打开链接,读取一些元值并将它们写入文本文件。

代码看起来很简单,但是一旦开始,内存就会开始缓慢攀升,直到耗尽。我打开文件并读到一个表并循环遍历它。

这是我尝试的一些代码,如果这有助于诊断我的问题。

    Private Sub Bob_Load(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles MyBase.Load
    AddHandler wbScrape.DocumentCompleted, New WebBrowserDocumentCompletedEventHandler(AddressOf ScrapeDocument)
    wbScrape.ScriptErrorsSuppressed = True

    Dim Lines = System.IO.File.ReadAllLines(LinkFileName)
    For Each line As String In Lines
        LinkList.Add(line)
    Next

    lblCount.Text = LinkList.Count.ToString
End Sub

Private Sub Button1_Click(sender As System.Object, e As System.EventArgs) Handles Button1.Click
    GetScrapePage()
End Sub

Private Sub GetScrapePage()
    lblCount.Text = LinkList.Count.ToString
    Link = LinkList.First.ToString
    wbScrape.Url = New Uri(Link)
End Sub

Private Sub ScrapeDocument(ByVal sender As Object, ByVal e As WebBrowserDocumentCompletedEventArgs)
    Dim parent_url As String = ""
    Dim og_id As String = ""
    Dim og_description As String = ""
    Dim ProductLineOutput As String = ""
    Dim LinkLineOutput As String = ""
    Dim i As Integer = 0
    Dim ProductLineOutputPrevious As String = "1st"

    Dim Elems As HtmlElementCollection

    Elems = wbScrape.Document.GetElementsByTagName("META")
    parent_url = wbScrape.Url.ToString()

    For Each elem As HtmlElement In Elems
        i = i + 1

        Dim NameStr As String = elem.GetAttribute("name")
        Dim PropertyStr As String = elem.GetAttribute("property")

        If ((NameStr IsNot Nothing) And (NameStr.Length <> 0)) Or ((PropertyStr IsNot Nothing) And (PropertyStr.Length <> 0)) Then
            If NameStr.ToLower().Equals("og:id") Then og_id = elem.GetAttribute("content").ToString
            If PropertyStr.ToLower().Equals("og:description") Then og_description = Chr(34) & elem.GetAttribute("content").ToString & Chr(34)
        End If

        If og_description <> "" Or i >= 100 Then Exit For

    Next

    ProductLineOutput = parent_url & "," & og_id & "," & og_description

    If ProductLineOutputPrevious <> "" Then
        If (ProductLineOutputPrevious <> ProductLineOutput) And parent_url.Contains("foo") Then
            Dim sw As StreamWriter
            sw = File.AppendText(ExportFileName)
            sw.WriteLine(ProductLineOutput)
            sw.Flush()
            sw.Close()
            sw.Dispose()
        End If
    End If

    ProductLineOutputPrevious = ProductLineOutput
    parent_url = ""
    og_id = ""
    og_description = ""
    i = 0
    ProductLineOutput = ""

    LinkList.Remove(Link)

    If LinkList.Count > 0 Then
        GetScrapePage()
    Else
        MsgBox("Complete")
        wbScrape.Dispose()
    End If

End Sub
4

1 回答 1

0

您需要为 WebBrowser 而不是循环使用DocumentCompleted事件。将您的处理代码放入DocumentCompleted事件中。我将运行许多链接的方式是将链接存储在 a 中List(Of T),一旦你填充它,你就会触发第一个元素上的第一个导航,DocumentCompleted如果你删除 List 中的第一个元素,如果它不为空,然后导航到第一个元素。这样你一次只做一个,处理器每次迭代都会释放资源。

于 2013-08-03T18:05:28.517 回答