这个程序基本上去一个网站并获取特定 div 标签中的所有链接。然后它导航到每个链接并获取这些页面中的链接。
但是,在成功获得第一个链接并到达第一个网站后,它会重新检查 windows 窗体数据并将类变量重置为其原始值,从而丢失所有链接。
为什么它会重新加载 Windows 表单数据,我如何保留以前的数据?
Imports System.Text.RegularExpressions
Public Class Form
Private stage As String = "Getting Page Links"
Dim PageUrls() As String = {}
Dim PageHtml() As String = {}
Private Sub Form_Load(sender As Object, e As System.EventArgs) Handles Me.Load
WebBrowser.Navigate("websiteurlhidden")
End Sub
Private Sub WebBrowser_DocumentCompleted(sender As Object, e As System.Windows.Forms.WebBrowserDocumentCompletedEventArgs) Handles WebBrowser.DocumentCompleted
If WebBrowser.ReadyState = WebBrowserReadyState.Complete Then
Try
Select Case stage
Case "Getting Page Links"
Dim htmlDocument As HtmlDocument = Me.WebBrowser.Document
Dim htmlElementCollection As HtmlElementCollection = htmlDocument.GetElementsByTagName("DIV")
For Each htmlElement As HtmlElement In HtmlElementCollection
Dim imgUrl As String = htmlElement.GetAttribute("classname")
If imgUrl = " nine " Then
Dim linkIndex As Integer = 0
Dim index2 As Integer = 0
For Each link As HtmlElement In htmlElement.GetElementsByTagName("a")
If linkIndex >= 26 Then
If Not String.IsNullOrEmpty(link.GetAttribute("href")) Then
ReDim Preserve PageUrls(index2)
PageUrls(index2) = link.GetAttribute("href")
' MessageBox.Show(link.GetAttribute("href"))
' MessageBox.Show(PageUrls(linkIndex))
index2 = index2 + 1
End If
End If
linkIndex = linkIndex + 1
Next
For Each str As String In PageUrls
' MessageBox.Show(str)
Next
stage = "Going through pages"
End If
Next
GoThroughPages()
Case "Going through pages"
Dim htmlDocument As HtmlDocument = Me.WebBrowser.Document
Dim htmlElementCollection As HtmlElementCollection = htmlDocument.GetElementsByTagName("DIV")
Dim linkIndex As Integer = 0
For Each htmlElement As HtmlElement In HtmlElementCollection
Dim imgUrl As String = htmlElement.GetAttribute("classname")
If imgUrl = " nine " Then
ReDim Preserve PageHtml(linkIndex)
'need to make permanent.
PageHtml(linkindex) = htmlElement.ToString()
Dim PageDownloadLinks = htmlElement.GetElementsByTagName("a")
End If
End
Next
' GoThroughPages()
Case Else
MessageBox.Show("case else")
End Select
Catch ex As Exception
' MessageBox.Show(ex.Message & " " & ex.ToString)
End Try
End If
End Sub
Private Sub GoThroughPages()
For linkIndex As Integer = 0 To PageUrls.Count - 1
MessageBox.Show(PageUrls(linkIndex))
WebBrowser.Navigate(PageUrls(linkIndex))
Delay(3)
While (WebBrowser.IsBusy)
Application.DoEvents()
End While
Next
End Sub
Sub Delay(ByVal dblSecs As Double)
Const OneSec As Double = 1.0# / (1440.0# * 60.0#)
Dim dblWaitTil As Date
Now.AddSeconds(OneSec)
dblWaitTil = Now.AddSeconds(OneSec).AddSeconds(dblSecs)
Do Until Now > dblWaitTil
Application.DoEvents() ' Allow windows messages to be processed
Loop
End Sub
End Class