0

我正在尝试在 excel/VBA 中搜索术语。我所有的搜索词都在从 A2 开始的 A 列中。我希望搜索结果的超链接出现在它旁边的列中。我已经修改了在 stackoverflow 上找到的代码,它在这方面做得很好,但我希望在“新闻”选项卡中搜索搜索词,而不是谷歌搜索中的“全部”选项卡,因为搜索词与时事相关。我已经做了一些 VBA,但我仍然认为自己是初学者,所以任何帮助将不胜感激。这是我到目前为止的代码。正如您在代码中看到的那样,我只是将“新闻”附加到每个搜索词作为快速解决方案,但结果不如新闻选项卡中的结果好或最新。

Option Explicit
Const TargetItemsQty = 7 ' results for each keyword
Sub Auto_Open()

Call GWebSearchIECtl

End Sub

Sub GWebSearchIECtl()

    Dim objSheet As Worksheet
    Dim objIE As Object
    Dim x As Long
    Dim y As Long
    Dim strSearch As String
    Dim lngFound As Long
    Dim st As String
    Dim colGItems As Object
    Dim varGItem As Variant
    Dim strHLink As String
    Dim strDescr As String
    Dim strNextURL As String

    Set objSheet = Sheets("Sheet1")
    Set objIE = CreateObject("InternetExplorer.Application")
    objIE.Visible = True ' for debug or captcha request cases
    y = 2 ' start searching for the keyword in the first row
    With objSheet
        .Select
        .Range("B2:ZZ10000").Delete ' clear previous results
        .Range("A1").Select
        Do Until .Cells(y, 1) = ""
            x = 2 ' start writing results from column B
            .Cells(y, 1).Select
            strSearch = .Cells(y, 1) ' current keyword
            With objIE
                lngFound = 0
                .Navigate "https://www.google.com/search?q=" & EncodeUriComponent(strSearch) & " news" ' go to first search results page
                Do
                    Do While .Busy Or Not .readyState = 4: DoEvents: Loop ' wait IE
                    Do Until .document.readyState = "complete": DoEvents: Loop ' wait document
                    Do While TypeName(.document.getElementById("res")) = "Null": DoEvents: Loop ' wait [#res] element
                    Set colGItems = .document.getElementById("res").getElementsByClassName("g") ' collection of search result [.g] items
                    For Each varGItem In colGItems ' process each item in collection
                        If varGItem.getElementsByTagName("a").Length > 0 And varGItem.getElementsByClassName("st").Length > 0 Then ' must have hyperlink and description
                            strHLink = varGItem.getElementsByTagName("a")(0).href ' get first hyperlink [a] found in current item
                            strDescr = GetInnerText(varGItem.getElementsByClassName("st")(0).innerHTML) ' get first description [span.st] found in current item
                            lngFound = lngFound + 1
                            With objSheet ' put result into cell
                                .Hyperlinks.Add .Cells(y, x), strHLink, , , strDescr
                                .Cells(y, x).WrapText = True
                                x = x + 1 ' next column
                            End With
                            If lngFound = TargetItemsQty Then Exit Do ' continue with next keyword - necessary quantity of the results for current keyword found
                        End If
                        DoEvents
                    Next
                    If TypeName(.document.getElementById("pnnext")) = "Null" Then Exit Do ' continue with next keyword - no [a#pnnext.pn] next page button exists
                    strNextURL = .document.getElementById("pnnext").href ' get next page url
                    .Navigate strNextURL ' go to next search results page
                Loop
            End With
            y = y + 1 ' next row
        Loop
    End With
    objIE.Quit

    ' google web search page contains the elements:
    ' [div#res] - main search results block
    ' [div.g] - each result item block within [div#res]
    ' [a] - hyperlink ancor(s) within each [div.g]
    ' [span.st] - description(s) within each [div.g]
    ' [a#pnnext.pn] - hyperlink ancor to the next search results page

    Columns("B:B").Select
    Range(Selection, Selection.End(xlToRight)).Select ' formatting column width
    Selection.ColumnWidth = 40
    Range("A1").Select
End Sub

Function EncodeUriComponent(strText As String) As String
    Static objHtmlfile As Object

    If objHtmlfile Is Nothing Then
        Set objHtmlfile = CreateObject("htmlfile")
        objHtmlfile.parentWindow.execScript "function encode(s) {return encodeURIComponent(s)}", "jscript"
    End If
    EncodeUriComponent = objHtmlfile.parentWindow.encode(strText)
End Function

Function GetInnerText(strText As String) As String
    Static objHtmlfile As Object

    If objHtmlfile Is Nothing Then
        Set objHtmlfile = CreateObject("htmlfile")
        objHtmlfile.Open
        objHtmlfile.Write "<body></body>"
    End If
    objHtmlfile.body.innerHTML = strText
    GetInnerText = objHtmlfile.body.innerText
End Function
4

1 回答 1

0

尝试这个。

Sub Gethits()
    Dim url As String, lastRow As Long
    Dim XMLHTTP As Object, html As Object, objResultDiv As Object, objH3 As Object, link As Object
    Dim start_time As Date
    Dim end_time As Date
    Dim var As String
    Dim var1 As Object

    lastRow = Range("A" & Rows.Count).End(xlUp).Row

    Dim cookie As String
    Dim result_cookie As String

    start_time = Time
    Debug.Print "start_time:" & start_time

    For i = 2 To lastRow

        url = "https://www.google.com/search?q=" & Cells(i, 1) & "&rnd=" & WorksheetFunction.RandBetween(1, 10000)

        Set XMLHTTP = CreateObject("MSXML2.serverXMLHTTP")
        XMLHTTP.Open "GET", url, False
        XMLHTTP.setRequestHeader "Content-Type", "text/xml"
        XMLHTTP.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1; rv:25.0) Gecko/20100101 Firefox/25.0"
        XMLHTTP.send

        Set html = CreateObject("htmlfile")
        html.body.innerHTML = XMLHTTP.ResponseText
        Set objResultDiv = html.getelementbyid("rso")
        Set var1 = html.getelementbyid("resultStats")
        Cells(i, 2).Value = var1.innerText

        DoEvents
    Next

    end_time = Time
    Debug.Print "end_time:" & end_time

    Debug.Print "done" & "Time taken : " & DateDiff("n", start_time, end_time)
    MsgBox "done" & "Time taken : " & DateDiff("n", start_time, end_time)
End Sub

在此处输入图像描述

于 2018-07-23T21:15:19.187 回答