0

这个程序基本上去一个网站并获取特定 div 标签中的所有链接。然后它导航到每个链接并获取这些页面中的链接。

但是,在成功获得第一个链接并到达第一个网站后,它会重新检查 windows 窗体数据并将类变量重置为其原始值,从而丢失所有链接。

为什么它会重新加载 Windows 表单数据,我如何保留以前的数据?

Imports System.Text.RegularExpressions

Public Class Form

Private stage As String = "Getting Page Links"
Dim PageUrls() As String = {}
Dim PageHtml() As String = {}

Private Sub Form_Load(sender As Object, e As System.EventArgs) Handles Me.Load

    WebBrowser.Navigate("websiteurlhidden")


End Sub

Private Sub WebBrowser_DocumentCompleted(sender As Object, e As System.Windows.Forms.WebBrowserDocumentCompletedEventArgs) Handles WebBrowser.DocumentCompleted
    If WebBrowser.ReadyState = WebBrowserReadyState.Complete Then

        Try




            Select Case stage
                Case "Getting Page Links"

                    Dim htmlDocument As HtmlDocument = Me.WebBrowser.Document
                    Dim htmlElementCollection As HtmlElementCollection = htmlDocument.GetElementsByTagName("DIV")

                    For Each htmlElement As HtmlElement In HtmlElementCollection

                        Dim imgUrl As String = htmlElement.GetAttribute("classname")
                        If imgUrl = " nine " Then



                            Dim linkIndex As Integer = 0
                            Dim index2 As Integer = 0
                            For Each link As HtmlElement In htmlElement.GetElementsByTagName("a")
                                If linkIndex >= 26 Then
                                    If Not String.IsNullOrEmpty(link.GetAttribute("href")) Then
                                        ReDim Preserve PageUrls(index2)

                                        PageUrls(index2) = link.GetAttribute("href")
                                        ' MessageBox.Show(link.GetAttribute("href"))
                                        '  MessageBox.Show(PageUrls(linkIndex))
                                        index2 = index2 + 1
                                    End If
                                End If
                                linkIndex = linkIndex + 1
                            Next

                            For Each str As String In PageUrls
                                '          MessageBox.Show(str)
                            Next
                            stage = "Going through  pages"

                        End If

                    Next

                    GoThroughPages()

                Case "Going through  pages"

                    Dim htmlDocument As HtmlDocument = Me.WebBrowser.Document
                    Dim htmlElementCollection As HtmlElementCollection = htmlDocument.GetElementsByTagName("DIV")
                    Dim linkIndex As Integer = 0
                    For Each htmlElement As HtmlElement In HtmlElementCollection

                        Dim imgUrl As String = htmlElement.GetAttribute("classname")
                        If imgUrl = " nine " Then
                            ReDim Preserve PageHtml(linkIndex)
                            'need to make permanent.
                            PageHtml(linkindex) = htmlElement.ToString()

                            Dim PageDownloadLinks = htmlElement.GetElementsByTagName("a")

                        End If
                        End
                    Next

                    ' GoThroughPages()

                Case Else
                    MessageBox.Show("case else")

            End Select

        Catch ex As Exception
            '  MessageBox.Show(ex.Message & " " & ex.ToString)
        End Try
    End If

End Sub

Private Sub GoThroughPages()

    For linkIndex As Integer = 0 To PageUrls.Count - 1


        MessageBox.Show(PageUrls(linkIndex))
        WebBrowser.Navigate(PageUrls(linkIndex))
        Delay(3)
        While (WebBrowser.IsBusy)

            Application.DoEvents()
        End While
    Next

End Sub

Sub Delay(ByVal dblSecs As Double)

    Const OneSec As Double = 1.0# / (1440.0# * 60.0#)
    Dim dblWaitTil As Date
    Now.AddSeconds(OneSec)
    dblWaitTil = Now.AddSeconds(OneSec).AddSeconds(dblSecs)
    Do Until Now > dblWaitTil
        Application.DoEvents() ' Allow windows messages to be processed
    Loop

End Sub

End Class
4

1 回答 1

0

简单的解决方案是更改:

Dim linkIndex As Integer = 0

Static linkIndex As Integer = 0

这将导致linkIndex在调用之间保留其值,并且ReDim Preserve PageHtml(linkIndex)不会在每次调用时重置。

于 2013-02-23T01:33:26.490 回答