我正在尝试使用 MSXML6 提取美国专利标题。
在 USPTO 网站上专利文档的全文 html 视图中,专利标题显示为第一个也是唯一一个作为“body”子元素的“font”元素。
这是我的函数不起作用(我没有收到错误;带有公式的单元格只是保持空白)。
有人可以帮我找出问题所在吗?
Function getUSPatentTitle(url As String)
Static colTitle As New Collection
Dim title As String
Dim pageSource As String
Dim xDoc As MSXML2.DOMDocument
Dim xNode As IXMLDOMNode
On Error Resume Next
title = colTitle(url)
If Err.Number <> 0 Then
Set html_doc = CreateObject("htmlfile")
Set xml_obj = CreateObject("MSXML6.XMLHTTP60")
xml_obj.Open "GET", url, False
xml_obj.send
pageSource = xml_obj.responseText
Set xml_obj = Nothing
Set xDoc = New MSXML2.DOMDocument
If Not xDoc.LoadXML(pageSource) Then
Err.Raise xDoc.parseError.ErrorCode, , xDoc.parseError.reason
End If
Set xNode = xDoc.getElementsByTagName("font").Item(1)
title = xNode.Text
If Not title = "" Then colTitle.Add Item:=title, Key:=url
End If
On Error GoTo 0 ' I understand "GoTo" is dangerous coding but copied from somebody and so far haven't thought of a more natural substitute for a GoTo statement
getUSPatentTitle = title
End Function