我需要获取页码才能从 .PDF 文档中的特定页面中提取文本。我正在使用 Excel VBA 函数,该函数利用 Acrobat 类型库 10.0 中的 JSObject
这是代码片段,当我尝试从 Doc 对象引用 pageNum 属性时,代码会出现问题。我试图避免使用 AV 层并仅使用 PD 层,因此我的宏仅在后台运行并且不调用 Acrobat 应用程序。
Function getTextFromPDF_JS(ByVal strFilename As String) As String
Dim pdDoc As New AcroPDDoc
Dim pdfPage As Acrobat.AcroPDPage
Dim pdfBookmark As Acrobat.AcroPDBookmark
Dim jso As Object
Dim BookMarkRoot As Object
Dim vBookmark As Variant
Dim objSelection As AcroPDTextSelect
Dim objHighlight As AcroHiliteList
Dim currPage As Integer
Dim strText As String
Dim BM_flag As Boolean
Dim count As Integer
Dim word As Variant
strText = ""
If (pdDoc.Open(strFilename)) Then
Set jso = pdDoc.GetJSObject
Set BookMarkRoot = jso.BookMarkRoot
vBookmark = jso.BookMarkRoot.Children
'Add a function call to see if a particular bookmark exists within the .PDF
Set pdfBookmark = CreateObject("AcroExch.PDBookmark")
BM_flag = pdfBookmark.GetByTitle(pdDoc, "Title Page")
If (BM_flag) Then
For i = 0 To UBound(vBookmark)
If vBookmark(i).Name = "Title Page" Then
vBookmark(i).Execute
jso.pageNum
Set pdfPage = pdDoc.AcquirePage(pageNum)
Set objHighlight = New AcroHiliteList
objHighlight.Add 0, 10000 ' Adjust this up if it's not getting all the text on the page
Set objSelection = pdfPage.CreatePageHilite(objHighlight)
If Not objSelection Is Nothing Then
For tCount = 0 To objSelection.GetNumText - 1
strText = strText & objSelection.GetText(tCount)
Next tCount
End If
Exit For
End If
pdDoc.Close
End If
End If
getTextFromPDF_JS = strText
End Function