1

我正在尝试从政府网站中提取一个数字,我已经做了很多谷歌搜索,但我有点迷失了想法,我下面的代码返回了一个数字,但这不是我想要得到的数字,我不完全确定为什么。

我想从“按地区划分的病例(整个大流行)”表“洛杉矶上层”部分和“海上绍森德”病例编号中减去该数字。

https://coronavirus.data.gov.uk/details/cases

我从网上某处偷了这段代码,并试图用我在网站 F12 部分找到的班级编号进行复制。

Sub ExtractLastValue()

Set objIE = CreateObject("InternetExplorer.Application")

objIE.Top = 0
objIE.Left = 0
objIE.Width = 800
objIE.Height = 600

objIE.Visible = True

objIE.Navigate ("https://coronavirus.data.gov.uk/details/cases")

Do
DoEvents
Loop Until objIE.readystate = 4

MsgBox objIE.document.getElementsByClassName("sc-bYEvPH khGBIg govuk-table__cell govuk-table__cell--numeric ")(0).innerText

Set objIE = Nothing

End Sub


4

1 回答 1

0

数据来自官方 API,当您单击上层面板时,会在该页面上动态返回 json 响应。


在此处查看并使用 API 指南:

https://coronavirus.data.gov.uk/details/developers-guide


您可以按照 API 文档中的指导进行直接 xhr 请求,然后使用 json 解析器来处理响应。对于您的请求,它将类似于以下内容:

https://coronavirus.data.gov.uk/api/v1/data?filters=areaName=Southend-on-Sea&areaType=utla&latestBy=cumCasesByPublishDate&structure=
{"date":"date", "areaName":"areaName","cumCasesByPublishDate":"cumCasesByPublishDate",
"cumCasesByPublishDateRate":"cumCasesByPublishDateRate"}

XHR:

使用 jsonconverter.bas 作为 json 解析器的工作示例

Option Explicit

Public Sub GetCovidNumbers()
    Dim http As Object, json As Object

    Set http = CreateObject("MSXML2.XMLHTTP")

    With http
        .Open "GET", "https://coronavirus.data.gov.uk/api/v1/data?filters=areaName=Southend-on-Sea&areaType=utla&latestBy=cumCasesByPublishDate&structure={""date"":""date"",""areaName"":""areaName"",""cumCasesByPublishDate"":""cumCasesByPublishDate"",""cumCasesByPublishDateRate"":""cumCasesByPublishDateRate""}", False
        .setRequestHeader "User-Agent", "Mozilla/5.0"
        .send
        Set json = JsonConverter.ParseJson(.responseText)("data")(1)
    End With
    With ActiveSheet
        Dim arr()
        arr = json.Keys
        .Cells(1, 1).Resize(1, UBound(arr) + 1) = arr
        arr = json.Items
        .Cells(2, 1).Resize(1, UBound(arr) + 1) = arr
    End With
End Sub

Json 库(用于上述解决方案):

我使用 jsonconverter.bas。从这里下载原始代码并添加到名为 JsonConverter 的标准模块中。然后,您需要转到 VBE > 工具 > 参考 > 添加对 Microsoft Scripting Runtime 的引用。从复制的代码中删除顶部的属性行。


IE浏览器:

您可以做一个更慢、更复杂的 Internet Explorer 解决方案,您需要在存在时选择 utla 选项,然后从表中选择所需的值:

Option Explicit

Public Sub GetCovidNumbers()
    'Tools references Microsoft Internet Controls and Microsoft HTML Object Library
    
    Dim ie As SHDocVw.InternetExplorer, t As Date, ele As Object
    Const MAX_WAIT_SEC As Long = 10
    
    Set ie = New SHDocVw.InternetExplorer
    
    With ie
        .Visible = True
        .Navigate2 "https://coronavirus.data.gov.uk/details/cases"
        While .Busy Or .ReadyState <> READYSTATE_COMPLETE: DoEvents: Wend
        
        t = Timer 'timed loop for element to be present to click on (to get utla)
        Do
            On Error Resume Next
            Set ele = .Document.querySelector("#card-cases_by_area_whole_pandemic [aria-label='Upper tier LA']")
            On Error GoTo 0
            If Timer - t > MAX_WAIT_SEC Then Exit Do
        Loop While ele Is Nothing

        If ele Is Nothing Then Exit Sub
        
        ele.Click
        
        While .Busy Or .ReadyState <> READYSTATE_COMPLETE: DoEvents: Wend
        
        Dim table As MSHTML.HTMLTable, datetime As String, result()
        
        Set table = .Document.querySelector("table[download='cumCasesByPublishDate,cumCasesByPublishDateRate']")
        datetime = .Document.querySelector("time").getAttribute("datetime")
        result = GetDataForUtla("Southend-on-Sea", datetime, table)
        
        With ActiveSheet
            .Cells(1, 1).Resize(1, 4) = Array("Datetime", "Area", "Cases", "Rate per 100,000 population")
            .Cells(2, 1).Resize(1, UBound(result) + 1) = result
        End With
        .Quit
    End With
    
End Sub

Public Function GetDataForUtla(ByVal utla As String, ByVal datetime As String, ByVal table As MSHTML.HTMLTable) As Variant
    Dim row As MSHTML.HTMLTableRow, i As Long

    For Each row In table.Rows
        
        If InStr(row.outerHTML, utla) > 0 Then
            Dim arr(4)
            arr(0) = datetime
            For i = 0 To 2
                arr(i + 1) = row.Children(i).innerText
            Next
            GetDataForUtla = arr
            Exit Function
        End If
    Next
    GetDataForUtla = Array("Not found")
End Function

参考:

  1. https://developer.mozilla.org/en-US/docs/Web/CSS/CSS_Selectors
  2. https://developer.mozilla.org/en-US/docs/Web/API/Document/querySelector
于 2021-01-07T18:10:07.083 回答