0

我不知道这些对象在幕后发生了什么,但我一直在使用它们并取得了巨大的成功。

dim ie As New SHDocVw.InternetExplorer
ie.navigate url
Set oDoc = ie.Document
iT = oDoc.body.innerText

dim oX As New MSXML2.XMLHTTP
dim oDoc As HTMLDocument
oX.Open "GET", url, False
oX.send
rT = oX.responseText

但是对于一个特定的网页,ie 对象挂起,产生错误,使我的硬盘驱动器颠簸并完全让我感到沮丧。

我喜欢 MSXML2 对象,因为它总是快速且无故障。这个特定网页上的问题是它没有返回正确的信息。似乎它获取了错误的页面,或者它没有获取全部内容。

我的最低要求是获取页面的整个 html——我可以从那里解析。

这是我试图获取的页面示例: http ://www.nfl.com/gamecenter/2011090800/2011/REG1/saints@packers?icampaign=GC_schedule_rr#menu=highlights&tab=analyze&analyze=playbyplay

我的目标是让他们全部获得,回到大约 2001 年,当时他们开始保持逐场比赛的记录。他们正在使用的技术令人不安,即阻止了 msxml2。

有什么建议可以让我朝着正确的方向前进吗?

4

1 回答 1

1

这不是最性感的解决方案,但你去:

Option Explicit

Sub fantasyFootball_egghead()
Const READYSTATE_COMPLETE = 4
Const tempDir As String = "C:\Windows\Temp\"

Dim URL$, s_outerhtml$ ''These are strings
Dim IE As Object, IE_Element As Object, IE_HTMLCollection As Object
Dim i_file% ''This is an integer
Dim blnSheetFnd As Boolean
Dim ws As Excel.Worksheet

''Enter your address to navigate to here
URL = "http://www.nfl.com/gamecenter/2011090800/2011/REG1/saints@packers?icampaign=GC_schedule_rr#menu=highlights&tab=analyze&analyze=playbyplay"

''Create an Internet Explorer object if it doesn't exist
If IE Is Nothing Then Set IE = CreateObject("InternetExplorer.Application")

''Make the window visible with true, hidden with false
IE.Visible = True
''navigate to the website
IE.Navigate URL

'' use this loop to make wait until the webpage has loaded
Do While IE.Busy Or IE.readyState <> READYSTATE_COMPLETE
  DoEvents
Loop

''The next line helps ensure that the html has been fully loaded
Application.Wait Now() + TimeValue("0:00:02")
s_outerhtml = IE.document.body.OuterHtml
i_file = FreeFile

''This is a modification of some code I found at www.tek-tips.com <--great resource
''the code saves a temporary copy of the webpage to your temp file
Open tempDir & "\tempFile.htm" For Output As #i_file
Print #i_file, s_outerhtml

Close #i_file

''Creating a "Data" sheet if it doesn't exist
For Each ws In ThisWorkbook.Worksheets
  If ws.Name = "Data" Then blnSheetFnd = True: Exit For
Next

If blnSheetFnd = False Then Sheets.Add: ActiveSheet.Name = "Data"

Sheets("Data").Cells.Clear

''Here is your webquery, using the temporary file as its source
''this is untested in 2003, if it errors out, record a macro
''and replace the property that throws the error with your recorded property
With Sheets("Data").QueryTables.Add(Connection:= _
   "URL;" & tempDir & "tempFile.htm" _
   , Destination:=Range("$A$1"))
   .Name = "Data"
   .FieldNames = True
   .RowNumbers = False
   .FillAdjacentFormulas = False
   .PreserveFormatting = True
   .RefreshOnFileOpen = False
   .BackgroundQuery = True
   .RefreshStyle = xlInsertDeleteCells
   .SavePassword = False
   .SaveData = True
   .AdjustColumnWidth = True
   .RefreshPeriod = 0
   .WebSelectionType = xlEntirePage
   .WebFormatting = xlWebFormattingAll
   .WebPreFormattedTextToColumns = True
   .WebConsecutiveDelimitersAsOne = True
   .WebSingleBlockTextImport = False
   .WebDisableDateRecognition = False
   .WebDisableRedirections = False
   .Refresh BackgroundQuery:=False
End With

''delete the temporary file
Kill tempDir & "\tempFile.htm"

IE.Quit
Set IE = Nothing
Set IE_HTMLCollection = Nothing

End Sub

如果你把它放在一个循环中,只要确保你删除了查询表,否则当连接太多时,excel会停止运行。

Sub delete_qryTables()
Dim qt As QueryTable
Dim qts As QueryTables

Set qts = ThisWorkbook.Worksheets("Data").QueryTables
For Each qt In qts
    qt.Delete
Next

End Sub
于 2012-08-19T01:38:54.830 回答