我有一个大约 52k 链接的文件。我想打开链接,读取一些元值并将它们写入文本文件。
代码看起来很简单,但是一旦开始,内存就会开始缓慢攀升,直到耗尽。我打开文件并读到一个表并循环遍历它。
这是我尝试的一些代码,如果这有助于诊断我的问题。
Private Sub Bob_Load(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles MyBase.Load
AddHandler wbScrape.DocumentCompleted, New WebBrowserDocumentCompletedEventHandler(AddressOf ScrapeDocument)
wbScrape.ScriptErrorsSuppressed = True
Dim Lines = System.IO.File.ReadAllLines(LinkFileName)
For Each line As String In Lines
LinkList.Add(line)
Next
lblCount.Text = LinkList.Count.ToString
End Sub
Private Sub Button1_Click(sender As System.Object, e As System.EventArgs) Handles Button1.Click
GetScrapePage()
End Sub
Private Sub GetScrapePage()
lblCount.Text = LinkList.Count.ToString
Link = LinkList.First.ToString
wbScrape.Url = New Uri(Link)
End Sub
Private Sub ScrapeDocument(ByVal sender As Object, ByVal e As WebBrowserDocumentCompletedEventArgs)
Dim parent_url As String = ""
Dim og_id As String = ""
Dim og_description As String = ""
Dim ProductLineOutput As String = ""
Dim LinkLineOutput As String = ""
Dim i As Integer = 0
Dim ProductLineOutputPrevious As String = "1st"
Dim Elems As HtmlElementCollection
Elems = wbScrape.Document.GetElementsByTagName("META")
parent_url = wbScrape.Url.ToString()
For Each elem As HtmlElement In Elems
i = i + 1
Dim NameStr As String = elem.GetAttribute("name")
Dim PropertyStr As String = elem.GetAttribute("property")
If ((NameStr IsNot Nothing) And (NameStr.Length <> 0)) Or ((PropertyStr IsNot Nothing) And (PropertyStr.Length <> 0)) Then
If NameStr.ToLower().Equals("og:id") Then og_id = elem.GetAttribute("content").ToString
If PropertyStr.ToLower().Equals("og:description") Then og_description = Chr(34) & elem.GetAttribute("content").ToString & Chr(34)
End If
If og_description <> "" Or i >= 100 Then Exit For
Next
ProductLineOutput = parent_url & "," & og_id & "," & og_description
If ProductLineOutputPrevious <> "" Then
If (ProductLineOutputPrevious <> ProductLineOutput) And parent_url.Contains("foo") Then
Dim sw As StreamWriter
sw = File.AppendText(ExportFileName)
sw.WriteLine(ProductLineOutput)
sw.Flush()
sw.Close()
sw.Dispose()
End If
End If
ProductLineOutputPrevious = ProductLineOutput
parent_url = ""
og_id = ""
og_description = ""
i = 0
ProductLineOutput = ""
LinkList.Remove(Link)
If LinkList.Count > 0 Then
GetScrapePage()
Else
MsgBox("Complete")
wbScrape.Dispose()
End If
End Sub