1

我目前正在尝试从晨星的表格中抓取某些数据,然后让它循环到下一个代码并重复直到没有更多代码。

目前,它将拉动追踪总回报表中的整个“类别排名”行。我只是想拉 3 个月、6 个月、YTD、1 年、3 年和 5 年。当它完成拉动这些时,它将循环到由导航行中的“Cells(p, 14)”确定的下一个代码。

IE。它检测到“LINKX”在单元格 1、14 中,因此它导航到http://performance.morningstar.com/fund/performance-return.action?t=LINKX®ion=usa&culture=en_US并提取所有“类别中的排名” “追踪总回报”表中的行。我只希望将指定的那些放入指定的单元格位置,然后循环到下一个代码。

我浏览了许多这些线程,使用 excel VBA 我试图从某个股票代码页面中提取关键特定信息,然后循环到下一个股票代码并重复。

Declare PtrSafe Function apiShowWindow Lib "user32" Alias "ShowWindow" _
        (ByVal hwnd As LongPtr, ByVal nCmdShow As LongPtr) As LongPtr
    Global Const SW_MAXIMIZE = 3
    Global Const SW_SHOWNORMAL = 1
    Global Const SW_SHOWMINIMIZED = 2

Sub LinkedInWebScrapeScript()

    Dim objIE As InternetExplorer

    Dim html As HTMLDocument

    Set objIE = GetObject("new:{D5E8041D-920F-45e9-B8FB-B1DEB82C6E5E}")
    objIE.Visible = 1
Dim p As Integer
p = 3

    objIE.navigate ("http://performance.morningstar.com/fund/performance-return.action?t=" & Cells(p, 14) & "&region=usa&culture=en_US")
    Application.Wait Now + #12:00:02 AM#

    While objIE.Busy
        DoEvents
    Wend
    apiShowWindow objIE.hwnd, SW_MAXIMIZE

    For i = 1 To 2
        objIE.document.parentWindow.scrollBy 0, 100000 & i
        Application.Wait Now + #12:00:01 AM#
    Next i

Dim TDelements As IHTMLElementCollection
Dim htmldoc As MSHTML.IHTMLDocument 'Document object
Dim eleColtr As MSHTML.IHTMLElementCollection 'Element collection for tr tags
Dim eleColtd As MSHTML.IHTMLElementCollection 'Element collection for td tags
Dim eleColtd1 As MSHTML.IHTMLElementCollection
Dim eleRow As MSHTML.IHTMLElement 'Row elements
Dim eleCol As MSHTML.IHTMLElement 'Column elements
Set htmldoc = objIE.document 'Document webpage
Set eleColtr = htmldoc.getElementsByTagName("tr") 'Find all tr tags
Set TDelements = htmldoc.getElementsByTagName("table")
'This section populates Excel
i = 0 'start with first value in tr collection


Set eleColtd = htmldoc.getElementsByClassName("r_table3 width955px print97")(0).getElementsByClassName("last")(0).getElementsByClassName("row_data divide") 'get all the td elements in that specific tr

    For Each eleCol In eleColtd 'for each element in the td collection
        Sheets("Sheet2").Range("A1").Offset(i, j).Value = eleCol.innerText 'paste the inner text of the td element, and offset at the same time
        j = j + 1 'move to next element in td collection
    Next eleCol 'rinse and repeat
i = i + 1

p = p + 1

objIE.navigate ("http://performance.morningstar.com/fund/performance-return.action?t=" & Cells(p, 14) & "&region=usa&culture=en_US")

Set eleColtd = htmldoc.getElementsByClassName("r_table3 width955px print97")(0).getElementsByClassName("last")(0).getElementsByClassName("row_data divide") 'get all the td elements in that specific tr

    For Each eleCol In eleColtd 'for each element in the td collection
        Sheets("Sheet2").Range("A1").Offset(i, j).Value = eleCol.innerText 'paste the inner text of the td element, and offset at the same time
        z = z + 1
        j = j + 1 'move to next element in td collection
    Next eleCol 'rinse and repeat


End Sub

它将拉动追踪总回报表上的整个“类别排名”行。我只是想拉 3 个月、6 个月、YTD、1 年、3 年和 5 年。当它完成拉动这些时,它将循环到由导航行中的“Cells(p, 14)”确定的下一个代码。

4

1 回答 1

3

下面显示了一个循环以及如何使用css 选择器选择适当的表格、tbody 然后表格单元格。代码从第 1 行开始从第 N 列读入一个数组。它假定该范围内没有空白单元格(尽管您可以添加一个测试来确定)。

数组上有一个循环,其中包含每个股票代码,并且 url 中的 TICKER 占位符被替换为当前股票代码值。

每月显示选项卡上有一行可以单击。

适当的行通过

Set rankings = .querySelectorAll("#tab-month-end-content .last td")

#tab-month-end-content是一个 id 选择器,它获取正确的选项卡,然后.last是最后一个类名的类选择器tbody(即last),然后td用于指定该tdtbody 中的子单元格。


CSS 选择器:

现代浏览器针对 css 进行了优化。Css 选择器是匹配 html 文档中元素的一种快速方法。Css 选择器通过 querySelector 或querySelectorAll方法应用;在这种情况下,HTMLDocument(即文档)。querySelector返回单个节点:css 选择器的第一个匹配项;querySelectorAll返回所有匹配项目的 nodeList - 然后您索引到该 nodeList 以获取特定项目,例如第二个 td 单元格位于索引 1。

查看我们指定的模式:

#tab-month-end-content .last td

第一部分是一个id 选择器#它通过 id 选择一个元素

#tab-month-end-content

当应用于页面时,这将返回两个匹配项,我们想要第二个

点击图片可放大

在此处输入图像描述

下一部分

.last 

是一个类选择器, ., 用于类名last。这将选择tbody上图中显示的标记子元素。由于只有第二个 id 匹配元素有这个子元素,我们现在正在使用正确的父元素继续并td使用类型选择器选择类型元素

td

上面描述的每个部分之间的空格被称为后代组合器,它们指定如果第二个选择器匹配的元素具有与第一个选择器匹配的祖先元素,即左侧的选择器是选择器匹配由右侧相邻 css 选择器检索到的元素。

我们可以在下一张图片中看到这一点:

点击图片可放大

在此处输入图像描述


VBA:

Option Explicit
Public Sub GetData()
    Dim ie As Object, tickers(), ws As Worksheet, lastRow As Long
    Dim results(), headers(), r As Long, i As Long, url As String

    headers = Array("ticker", "3m", "6m", "ytd", "1y", "3y", "6y")
    Set ws = ThisWorkbook.Worksheets("Sheet1")
    tickers = Application.Transpose(ws.Range("N1:N" & GetLastRow(ws, 14)).Value)
    ReDim results(1 To UBound(tickers), 1 To UBound(headers) + 1)
    Set ie = GetObject("new:{D5E8041D-920F-45e9-B8FB-B1DEB82C6E5E}")
    With ie
        .Visible = True
        For i = LBound(tickers) To UBound(tickers)
            r = r + 1
            url = Replace$("http://performance.morningstar.com/fund/performance-return.action?t=TICKER&region=usa&culture=en_US", "TICKER", tickers(i))
            .Navigate2 url

            While .Busy Or .readyState < 4: DoEvents: Wend

            .document.querySelector("[tabname='#tabmonth']").Click

            Dim rankings As Object
            Do
            Loop While .document.querySelectorAll("#tab-month-end-content .last td").Length = 0 'could add timed loop here

            With .document
                Set rankings = .querySelectorAll("#tab-month-end-content .last td")
                On Error Resume Next
                results(r, 1) = tickers(i)
                results(r, 2) = rankings.item(1).innerText
                results(r, 3) = rankings.item(2).innerText
                results(r, 4) = rankings.item(3).innerText
                results(r, 5) = rankings.item(4).innerText
                results(r, 6) = rankings.item(5).innerText
                results(r, 7) = rankings.item(6).innerText
                On Error GoTo 0
            End With
            Set rankings = Nothing
        Next
        ws.Cells(1, 15).Resize(UBound(results, 1), UBound(results, 2)) = results
        .Quit
    End With
End Sub

Public Function GetLastRow(ByVal ws As Worksheet, Optional ByVal columnNumber As Long = 1) As Long
    With ws
        GetLastRow = .Cells(.rows.Count, columnNumber).End(xlUp).Row
    End With
End Function

正如@SIM 所提到的,您可以使用xmlhttp并避免浏览器,但不确定您的安全设置是否需要将站点列入白名单。您将需要探索占位符在此处的 url 中是否有效:XNAS:TICKER. 前缀可能因您的股票代码而XNAS异,在这种情况下,您需要适当的字符串,包括 N 列中的前缀,然后将扩展的占位符替换为例如=PLACEHOLDER&region...........

Option Explicit
Public Sub GetData()
    Dim tickers(), ws As Worksheet, lastRow As Long
    Dim results(), headers(), r As Long, i As Long, url As String, html As HTMLDocument
    Set html = New HTMLDocument 'vbe > tools > references > Microsoft HTML Object Library

    headers = Array("ticker", "3m", "6m", "ytd", "1y", "3y", "6y")
    Set ws = ThisWorkbook.Worksheets("Sheet1")
    tickers = Application.Transpose(ws.Range("N1:N" & GetLastRow(ws, 14)).Value)
    ReDim results(1 To UBound(tickers), 1 To UBound(headers) + 1)

    With CreateObject("MSXML2.XMLHTTP")

        For i = LBound(tickers) To UBound(tickers)
            r = r + 1
            url = Replace$("http://performance.morningstar.com/perform/Performance/fund/trailing-total-returns.action?&t=XNAS:TICKER&region=usa&culture=en-US&cur=&ops=clear&s=0P0000J533&ndec=2&ep=true&align=m&annlz=true&comparisonRemove=false&loccat=&taxadj=&benchmarkSecId=&benchmarktype=", "TICKER", tickers(i))
           .Open "GET", url, False
           .setRequestHeader "If-Modified-Since", "Sat, 1 Jan 2000 00:00:00 GMT"
           .setRequestHeader "DNT", "1"
           .send
           html.body.innerHTML = .responseText

            Dim rankings As Object
            With html
                Set rankings = .querySelectorAll(".last td")

                On Error Resume Next
                results(r, 1) = tickers(i)
                results(r, 2) = rankings.item(1).innerText
                results(r, 3) = rankings.item(2).innerText
                results(r, 4) = rankings.item(3).innerText
                results(r, 5) = rankings.item(4).innerText
                results(r, 6) = rankings.item(5).innerText
                results(r, 7) = rankings.item(6).innerText
                On Error GoTo 0
            End With
            Set rankings = Nothing
        Next
        ws.Cells(1, 15).Resize(UBound(results, 1), UBound(results, 2)) = results
    End With
End Sub

Public Function GetLastRow(ByVal ws As Worksheet, Optional ByVal columnNumber As Long = 1) As Long
    With ws
        GetLastRow = .Cells(.rows.Count, columnNumber).End(xlUp).Row
    End With
End Function
于 2019-07-01T17:15:05.850 回答