0

我不确定为什么我的代码不起作用(从我试图从中提取信息的网站的 HTMLDoc 中返回公司名称、电话号码和联系电话。你能帮助确定我做错了什么吗(很可能是IHTMLElement 和 IHTMLElementCollection 数据类型,和/或通过 getElementsByTagName、getElementsByClassName 等访问 HTML。谢谢!

Option Explicit

Sub FinalMantaSub()
Dim IE As New SHDocVw.InternetExplorer
Dim HTMLDoc As MSHTML.HTMLDocument

IE.Visible = False
IE.navigate "https://www.manta.com/search?search_source=business&search=general+hospitals&search_location=Dallas+TX&pt=32.7825%2C-96.8207"

Do While IE.READYSTATE <> READYSTATE_COMPLETE
DoEvents
Loop

Set HTMLDoc = IE.document

Range("A3").Value = "Name"
Range("B3").Value = "Address"
Range("C3").Value = "Phone"

'variables to output on excel sheet
Dim BusinessNameFinal As String
Dim BusinessAddressFinal As String
Dim BusinessPhoneFinal As String

'variables used to create final BusinessAddress variable
Dim streetAddress As IHTMLElement
Dim addressLocality As IHTMLElement
Dim addressRegion As IHTMLElement
Dim postalCode As IHTMLElement

Dim itemprop As String
Dim itemprop2 As String

Dim BusinessNameCollection As IHTMLElementCollection
Dim BusinessName As IHTMLElement
Dim BusinessAddressCollection As IHTMLElementCollection
Dim BusinessAddress As IHTMLElement
Dim BusinessPhoneCollection As IHTMLElementCollection
Dim BusinessPhone As IHTMLElement

Dim RowNumber As Long

'get ready for business name looping
RowNumber = 4
Set BusinessName = HTMLDoc.getElementsByClassName("media-heading text-primary h4")(0).getElementsByTagName("strong").innerText
Set BusinessNameCollection = BusinessName.all

    'loop for business names
    For Each BusinessName In BusinessNameCollection
        Cells(RowNumber, 1).Value = BusinessName
        RowNumber = RowNumber + 1
    Next BusinessName

'get ready for business address looping
RowNumber = 4
itemprop = HTMLDoc.getElementsByClassName("mvm mhn").getElementsByTagName("span").getAttribute("itemprop")
    If itemprop = "streetAddress" Then
        Set streetAddress = HTMLDoc.getElementsByClassName("mvm mhn").getElementsByTagName("span").innerText
    ElseIf itemprop = "addressLocality" Then
        Set addressLocality = HTMLDoc.getElementsByTagName("span").innerText
    ElseIf itemprop = "addressRegion" Then
        Set addressRegion = HTMLDoc.getElementsByTagName("span").innerText
    ElseIf itemprop = "postalCode" Then
        Set postalCode = HTMLDoc.getElementsByTagName("span").innerText
    End If
Set BusinessAddress = streetAddress & addressLocality & addressRegion & postalCode
Set BusinessAddressCollection = BusinessAddress.all

    'loop for business addresses
    For Each BusinessAddress In BusinessAddressCollection
        BusinessAddress = streetAddress & vbNewLine & addressLocality & ", " & addressRegion & " " & postalCode
        Cells(RowNumber, 2).Value = BusinessAddress
        RowNumber = RowNumber + 1
    Next BusinessAddress

'get ready for business phone looping
RowNumber = 4
itemprop2 = HTMLDoc.getElementsByClassName("hidden-device-xs")(0).getAttribute("itemprop")
    If itemprop2 = "telephone" Then
        BusinessPhone = HTMLDoc.getElementsByClassName("hidden-device-xs")(0).getElementsByTagName("strong").innerText
    End If
Set BusinessPhone = HTMLDoc.getElementsByClassName("hidden-device-xs")(0).getElementsByTagName("strong").innerText
Set BusinessPhoneCollection = BusinessPhone.all

    'loop for business phones
    For Each BusinessPhone In BusinessPhoneCollection
        Cells(RowNumber, 3).Value = BusinessPhone
        RowNumber = RowNumber + 1
    Next BusinessPhone

Range("A1").Activate
Set HTMLDoc = Nothing

 'do some final formatting
 Range("A3").CurrentRegion.WrapText = False
 Range("A3").CurrentRegion.EntireColumn.AutoFit
 Range("A1:C1").EntireColumn.HorizontalAlignment = xlCenter
 Range("A1:D1").Merge
 Range("A1").Value = "Manta.com Business Contacts"
 Range("A1").Font.Bold = True
 Application.StatusBar = ""
 MsgBox "Done!"

 End Sub
4

1 回答 1

0

这将提取信息。您还没有循环代码中的所有结果页面或提到它,所以我已经设置它来向您展示如何执行结果的第一页。让我知道这是怎么回事。

代码:

Option Explicit

Public Sub FinalMantaSub()     '<== Can't have ad blocker enabled for this site

    Dim IE As New SHDocVw.InternetExplorer
    Dim HTMLDoc As MSHTML.HTMLDocument

    IE.Visible = True
    IE.navigate "https://www.manta.com/search?search_source=business&search=general+hospitals&search_location=Dallas+TX&pt=32.7825%2C-96.8207"

    Do While IE.readyState <> READYSTATE_COMPLETE
        DoEvents
    Loop

    Set HTMLDoc = IE.document

    Dim c As Object, i As Long

    Set c = HTMLDoc.querySelectorAll("div.media-body")

    Do While Not c(i) Is Nothing
        Debug.Print "Result #" & i + 1
        Debug.Print vbNewLine
        Debug.Print "Name: " & c(i).querySelector("[itemprop=""name""]").innerText
        Debug.Print "Address: " & c(i).querySelector("[itemprop=""address""]").innerText
        Debug.Print "Phone: " & c(i).querySelector("[itemprop=""telephone""]").innerText
        Debug.Print String$(20, Chr$(61))
        i = i + 1
    Loop
    IE.Quit
End Sub

输出快照:

快照

更新:

有大量的结果,但你可以有一个如下的外循环。然后,您可以将上述内容转换为被调用的子对象。

    Dim arr() As String, pageNo As Long
    arr = Split(HTMLDoc.querySelector(".pagination.pagination-md.mll a").href, "&pt")
    pageNo = 1

    Do While Err.Number = 0

        On Error GoTo Errhand:

        Dim url As String
        url = Split(arr(0), "&")(0) & "&pg=" & pageNo & "&pt" & arr(1)
        Debug.Print url
        IE.navigate url
        Do While IE.readyState <> READYSTATE_COMPLETE
            DoEvents
        Loop
        pageNo = pageNo + 1
    Loop

Errhand:
    Debug.Print "Stopped after " & pageNo & " pages."
于 2018-05-03T13:19:57.860 回答