我已经整理了一个示例,该示例将从您指定的页面中提取名称和标题,并将它们放入工作表 1。
代码只有在底层 html 的布局保持不变的情况下才能工作。它不支持更新现有列表(在再次阅读列表之前删除了工作表 1 上的任何内容)
要使用此代码,您必须将其放置在新的代码模块(而不是工作表或工作簿部分)中,您可以从代码编辑器或通过 Excel 主窗口中的宏菜单运行它。
' Note: This code requires the following references to be loaded.
' Microsoft HTML Object Library (mshtml.tlb)
' Microsoft Internet Controls (ieframe.dll)
' To add a reference
' In the VBA Code Editor, in the Tools Menu click the References item
' Scroll through the list and ensure that the references are selected
' Press OK and your done.
Sub Scrape()
Dim Browser As InternetExplorer
Dim Document As HTMLDocument
Dim Element As IHTMLElement
Dim Elements As IHTMLElementCollection
Dim empName As String
Dim empTitle As String
Dim Sheet As Worksheet
Set Sheet = ThisWorkbook.ActiveSheet
Sheet.UsedRange.ClearContents ' Nuke the old list
Set Browser = New InternetExplorer
Browser.navigate "http://www.hsbc.com/about-hsbc/leadership"
Do While Browser.Busy And Not Browser.readyState = READYSTATE_COMPLETE
DoEvents
Loop
Set Document = Browser.Document
Set Elements = Document.getElementsByClassName("profile-col1")
For Each Element In Elements
empName = Trim(Element.Children(1).Children(0).innerText)
empTitle = Trim(Element.Children(1).Children(1).innerText)
Sheet.Range("A1:B1").Insert xlShiftDown
Sheet.Cells(1, 1).Value = empName
Sheet.Cells(1, 2).Value = empTitle
'Debug.Print "[ name] " & empName
'Debug.Print "[ title] " & empTitle
Next Element
Set Browser = Nothing
Set Elements = Nothing
End Sub