我发布了第二个答案,因为我相信我的第一个答案对于许多类似的应用程序来说已经足够了,因此在这种情况下它不起作用。
为什么其他方法失败:
- 方法:这会引发一个新窗口,该
.Click
窗口需要用户在运行时输入,似乎无法使用WinAPI
来控制此窗口。或者,至少不是我能确定的任何方式。代码执行.Click
在线停止,直到用户手动干预,没有办法使用aGoTo
或aWait
或任何其他方法来规避这种行为。
- 使用
WinAPI
函数直接下载源文件是行不通的,因为按钮的 URL 不包含文件,而是动态提供文件的 js 函数。
这是我提出的解决方法:
您可以读取网页.body.InnerText
,将其写入纯文本/csv 文件FileSystemObject
,然后使用Regular Expressions
字符串操作的组合,将数据解析为正确分隔的 CSV 文件。
Sub WebDataExtraction()
Dim url As String
Dim fName As String
Dim lnText As String
Dim varLine() As Variant
Dim vLn As Variant
Dim newText As String
Dim leftText As String
Dim breakTime As Date
'## Requires reference to Microsoft VBScript Regular Expressions 5.5
Dim REMatches As MatchCollection
Dim m As Match
'## Requires reference to Microsoft Internet Controls
Dim IeApp As InternetExplorer
'## Requires reference to Microsoft HTML object library
Dim IeDoc As HTMLDocument
Dim ele As HTMLFormElement
'## Requires reference to Microsoft Scripting Runtime
Dim fso As FileSystemObject
Dim f As TextStream
Dim ln As Long: ln = 1
breakTime = DateAdd("s", 60, Now)
url = "http://www.bmreports.com/bsp/BMRSSystemData.php?pT=DDAD&zT=N&dT=NRT"
Set IeApp = CreateObject("InternetExplorer.Application")
With IeApp
.Visible = True
.Navigate url
Do Until .ReadyState = 4
Loop
Set IeDoc = .Document
End With
'Wait for the data to display on the page
Do
If Now >= breakTime Then
If MsgBox("The website is taking longer than usual, would you like to continue waiting?", vbYesNo) = vbNo Then
GoTo EarlyExit
Else:
breakTime = DateAdd("s", 60, Now)
End If
End If
Loop While Trim(IeDoc.body.innerText) = "XML CSV Please Wait Data Loading Sorting"
'## Create the text file
fName = ActiveWorkbook.Path & "\exported-csv.csv"
Set fso = CreateObject("Scripting.FileSystemObject")
Set f = fso.OpenTextFile(fName, 2, True, -1)
f.Write IeDoc.body.innerText
f.Close
Set f = Nothing
'## Read the text file
Set f = fso.OpenTextFile(fName, 1, False, -1)
Do
lnText = f.ReadLine
'## The data starts on the 4th line in the InnerText.
If ln >= 4 Then
'## Return a collection of matching date/timestamps to which we can parse
Set REMatches = SplitLine(lnText)
newText = lnText
For Each m In REMatches
newText = Replace(newText, m.Value, ("," & m.Value & ","), , -1, vbTextCompare)
Next
'## Get rid of consecutive delimiters:
Do
newText = Replace(newText, ",,", ",")
Loop While InStr(1, newText, ",,", vbBinaryCompare) <> 0
'## Then use some string manipulation to parse out the first 2 columns which are
' not a match to the RegExp we used above.
leftText = Left(newText, InStr(1, newText, ",", vbTextCompare) - 1)
leftText = Left(leftText, 10) & "," & Right(leftText, Len(leftText) - 10)
newText = Right(newText, Len(newText) - InStr(1, newText, ",", vbTextCompare))
newText = leftText & "," & newText
'## Store these lines in an array
ReDim Preserve varLine(ln - 4)
varLine(ln - 4) = newText
End If
ln = ln + 1
Loop While Not f.AtEndOfStream
f.Close
'## Re-open the file for writing the delimited lines:
Set f = fso.OpenTextFile(fName, 2, True, -1)
'## Iterate over the array and write the data in CSV:
For Each vLn In varLine
'Omit blank lines, if any.
If Len(vLn) <> 0 Then f.WriteLine vLn
Next
f.Close
EarlyExit:
Set fso = Nothing
Set f = Nothing
IeApp.Quit
Set IeApp = Nothing
End Sub
Function SplitLine(strLine As String) As MatchCollection
'returns a RegExp MatchCollection of Date/Timestamps found in each line
'## Requires reference to Microsoft VBScript Regular Expressions 5.5
Dim RE As RegExp
Dim matches As MatchCollection
Set RE = CreateObject("vbscript.regexp")
With RE
.MultiLine = False
.Global = True
.IgnoreCase = True
'## Use this RegEx pattern to parse the date & timestamps:
.Pattern = "(19|20)\d\d[-](0[1-9]|1[012])[-](0[1-9]|[12][0-9]|3[01])[ ]\d\d?:\d\d:\d\d"
End With
Set matches = RE.Execute(strLine)
Set SplitLine = matches
End Function