我正在使用 Acrobat SDK 从 pdf 文件中提取文本。
这工作得很好。
我的代码并没有从评论中提取文本。
是否可以使用 Acrobat SDK 从评论/注释中提取文本?
我知道我可以使用 iTextSharp 并且效果很好,但我只想使用一个库。
下面的代码:
Public Shared Function AdobePdfParse(strFileName As String, strTxtFromFile As String) As String
'Note: A Reference to the Adobe Library must be set in Tools|References!
'Note! This only works with Acrobat Pro installed on your PC, will not work with Reader
Dim AcroApp As CAcroApp, AcroAVDoc As CAcroAVDoc, AcroPDDoc As CAcroPDDoc
Dim AcroHiliteList As CAcroHiliteList, AcroTextSelect As CAcroPDTextSelect
Dim PageNumber, PageContent, i, j, iNumPages
Dim strResult As String
AcroApp = CreateObject("AcroExch.App")
AcroAVDoc = CreateObject("AcroExch.AVDoc")
If AcroAVDoc.Open(strFileName, vbNull) <> True Then Exit Function
AcroPDDoc = AcroAVDoc.GetPDDoc
iNumPages = AcroPDDoc.GetNumPages
Dim intfirst As Integer = 1
For i = 0 To iNumPages - 1
PageNumber = AcroPDDoc.AcquirePage(i)
PageContent = CreateObject("AcroExch.HiliteList")
If PageContent.Add(0, 9000) <> True Then Exit Function
AcroTextSelect = PageNumber.CreatePageHilite(PageContent)
' The next line is needed to avoid errors with protected PDFs that can't be read
On Error Resume Next
For j = 0 To AcroTextSelect.GetNumText - 1
strTxtFromFile = strTxtFromFile & AcroTextSelect.GetText(j)
Next (j)
Next i
AcroAVDoc.Close(bNoSave:=0)
AcroApp = Nothing
AcroAVDoc = Nothing
'Return sbTxtFromFile
Return strTxtFromFile
End Function