3

我必须使用 excel 宏打开谷歌搜索页面。在我在 excel 中提供搜索参数后,我能够成功打开谷歌搜索页面。但是,我的任务是打开第一个返回的搜索结果页面并在该页面中进行一些数据提取。我使用了下面的代码。

假设如果我搜索“ Sachin Tendulkar wiki ”,我应该能够打开搜索结果中的第一页。到目前为止,我的代码如下。

Dim ie As InternetExplorer
Dim RegEx As RegExp, RegMatch As MatchCollection
Dim MyStr As String
Dim pDisp As Object
Set ie = New InternetExplorer
Set RegEx = New RegExp
Dim iedoc As Object

'Search google for "something"
ie.Navigate "http://www.google.com.au/search?hl=en&q=sachin+tendulkar+wiki&meta="

'Loop unitl ie page is fully loaded
Do Until ie.ReadyState = READYSTATE_COMPLETE
Loop



MyStr = ie.Document.body.innertext
Set RegMatch = RegEx.Execute(MyStr)

'If a match to our RegExp searchstring is found then launch this page
If RegMatch.Count > 0 Then
    ie.Navigate RegMatch(0)
    Do Until ie.ReadyState = READYSTATE_COMPLETE
    Loop
         MsgBox "Loaded"
         'show internet explorer
    ie.Visible = True
    'Private Sub ie_DocumentComplete(ByVal pDisp As Object, URL As Variant)
    Set iedoc = ie.Application.Document
    'iedoc.getElementById("divid").Value = "poS0"
    'MsgBox iedoc

    'ie.Navigate iedoc.getelementsbytagname("ol")(0).Children(0).getelementsbytagname("a")(0).href
    ie.Navigate iedoc.getelementsbyclassname("divid")("poS0").href
    Else
    MsgBox "No linkedin profile found"
End If

Set RegEx = Nothing
Set ie = Nothing

我在谷歌搜索页面中查看了页面源。我有一个特定的 div id = "pos0" 这是第一个搜索结果的 id。我必须让 IE 导航到 div id = "pos0" 的页面。我无法在 VBA 中完成这件事。有人可以帮我吗?

谢谢和问候, 拉梅什

4

2 回答 2

4

你有几个问题。首先访问文档对象它ie.Document不是ie.Application.Document。我已经更新了您的代码,以显示如何使用子字符串快速找到第一个 url。

Dim ie As InternetExplorer
Dim RegEx As RegExp, RegMatch As MatchCollection
Dim MyStr As String
Dim pDisp As Object
Set ie = New InternetExplorer
Set RegEx = New RegExp
Dim iedoc As Object

'Search google for "something"
ie.Navigate "http://www.google.com.au/search?hl=en&q=sachin+tendulkar+wiki&meta="

'Loop unitl ie page is fully loaded
Do Until ie.ReadyState = READYSTATE_COMPLETE
Loop



MyStr = ie.Document.body.innertext
Set RegMatch = RegEx.Execute(MyStr)

'If a match to our RegExp searchstring is found then launch this page
If RegMatch.Count > 0 Then
    ie.Navigate RegMatch(0)
    Do Until ie.ReadyState = READYSTATE_COMPLETE
    Loop
         MsgBox "Loaded"
         'show internet explorer
    ie.Visible = True
    'Private Sub ie_DocumentComplete(ByVal pDisp As Object, URL As Variant)
    '****************************************
    'EDITS
    '****************************************
    Set iedoc = ie.Document

    'create a variable to hold the text
    Dim extractedHTML As String
    'start and end points for the substring
    Dim iStart, iEnd As Integer
    'get the element with ID of search - this is where the results start
    extractedHTML = iedoc.getElementById("search").innerHTML
    'find the first href as this will be the first link, add 1 to encompass the quote
    iStart = InStr(1, extractedHTML, "href=", vbTextCompare) + Len("href=") + 1
    'locate the next quote as this will be the end of the href
    iEnd = InStr(iStart, extractedHTML, Chr(34), vbTextCompare)
    'extract the text
    extractedHTML = Mid(extractedHTML, iStart, iEnd - iStart)
    'go to the URL
    ie.Navigate extractedHTML

    '****************************************
    'End EDITS
    '****************************************
    Else
    MsgBox "No linkedin profile found"
End If

Set RegEx = Nothing
Set ie = Nothing
于 2013-02-06T22:55:51.773 回答
2

您可以考虑使用 xmlHTTP 对象而不是使用 IE。
HTTP 请求更简单,更快

下面是示例代码

Sub xmlHttp()

    Dim URl As String, lastRow As Long
    Dim xmlHttp As Object, html As Object, objResultDiv As Object, objH3 As Object, link As Object


    lastRow = Range("A" & Rows.Count).End(xlUp).Row

    For i = 2 To lastRow

        URl = "https://www.google.co.in/search?q=" & Cells(i, 1)

        Set xmlHttp = CreateObject("MSXML2.XMLHTTP")
        xmlHttp.Open "GET", URl, False
        xmlHttp.setRequestHeader "Content-Type", "text/xml"
        xmlHttp.send

        Set html = CreateObject("htmlfile")
        html.body.innerHTML = xmlHttp.ResponseText
        Set objResultDiv = html.getelementbyid("rso")
        Set objH3 = objResultDiv.getelementsbytagname("H3")(0)
        Set link = objH3.getelementsbytagname("a")(0)


        str_text = Replace(link.innerHTML, "<EM>", "")
        str_text = Replace(str_text, "</EM>", "")

        Cells(i, 2) = str_text
        Cells(i, 3) = link.href
    Next
End Sub

在此处输入图像描述

HTH
桑托什

于 2013-07-06T04:14:08.380 回答