我试图使用 tabula 命令行将 PDF 转换为 csv,但它给出了错误
java -jar ./tabula-0.9.1-jar-with-dependencies.jar -p all -o data_table.csv file1.pdf.
任何人都可以请帮忙。
Dec 11, 2016 4:11:31 PM org.apache.pdfbox.pdmodel.graphics.xobject.PDXObjectImage mask
WARNING: Colour key masking isn't supported
Dec 11, 2016 4:11:32 PM org.apache.pdfbox.util.PDFStreamEngine processOperator
WARNING: java.lang.ArrayIndexOutOfBoundsException: 5
java.lang.ArrayIndexOutOfBoundsException: 5
at java.awt.geom.Path2D$Float$TxIterator.currentSegment(Path2D.java:1029)
at technology.tabula.ObjectExtractor.strokeOrFillPath(ObjectExtractor.java:226)
at technology.tabula.ObjectExtractor.strokePath(ObjectExtractor.java:275)
at org.apache.pdfbox.util.operator.pagedrawer.StrokePath.process(StrokePath.java:47)
at org.apache.pdfbox.util.PDFStreamEngine.processOperator(PDFStreamEngine.java:557)
at org.apache.pdfbox.util.PDFStreamEngine.processOperator(PDFStreamEngine.java:532)
at org.apache.pdfbox.util.operator.pagedrawer.FillNonZeroAndStrokePath.process(FillNonZeroAndStrokePath.java:52)
at org.apache.pdfbox.util.PDFStreamEngine.processOperator(PDFStreamEngine.java:557)
at org.apache.pdfbox.util.PDFStreamEngine.processOperator(PDFStreamEngine.java:532)
at org.apache.pdfbox.util.operator.pagedrawer.CloseFillNonZeroAndStrokePath.process(CloseFillNonZeroAndStrokePath.java:47)
at org.apache.pdfbox.util.PDFStreamEngine.processOperator(PDFStreamEngine.java:557)
at org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:268)
at org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:235)
at org.apache.pdfbox.util.PDFStreamEngine.processStream(PDFStreamEngine.java:215)
at technology.tabula.ObjectExtractor.drawPage(ObjectExtractor.java:153)
at technology.tabula.ObjectExtractor.extractPage(ObjectExtractor.java:108)
at technology.tabula.PageIterator.next(PageIterator.java:29)
at technology.tabula.CommandLineApp.extractTables(CommandLineApp.java:142)
at technology.tabula.CommandLineApp.main(CommandLineApp.java:60)
Dec 11, 2016 4:11:32 PM org.apache.pdfbox.util.PDFStreamEngine processOperator
WARNING: java.lang.ArrayIndexOutOfBoundsException: 5
java.lang.ArrayIndexOutOfBoundsException: 5
at java.awt.geom.Path2D$Float$TxIterator.currentSegment(Path2D.java:1029)
at technology.tabula.ObjectExtractor.strokeOrFillPath(ObjectExtractor.java:226)
at technology.tabula.ObjectExtractor.strokePath(ObjectExtractor.java:275)
at org.apache.pdfbox.util.operator.pagedrawer.StrokePath.process(StrokePath.java:47)
at org.apache.pdfbox.util.PDFStreamEngine.processOperator(PDFStreamEngine.java:557)
at org.apache.pdfbox.util.PDFStreamEngine.processOperator(PDFStreamEngine.java:532)
at org.apache.pdfbox.util.operator.pagedrawer.FillNonZeroAndStrokePath.process(FillNonZeroAndStrokePath.java:52)
at org.apache.pdfbox.util.PDFStreamEngine.processOperator(PDFStreamEngine.java:557)
at org.apache.pdfbox.util.PDFStreamEngine.processOperator(PDFStreamEngine.java:532)
at org.apache.pdfbox.util.operator.pagedrawer.CloseFillNonZeroAndStrokePath.process(CloseFillNonZeroAndStrokePath.java:47)
at org.apache.pdfbox.util.PDFStreamEngine.processOperator(PDFStreamEngine.java:557)
at org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:268)
at org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:235)
at org.apache.pdfbox.util.PDFStreamEngine.processStream(PDFStreamEngine.java:215)
at technology.tabula.ObjectExtractor.drawPage(ObjectExtractor.java:153)
at technology.tabula.ObjectExtractor.extractPage(ObjectExtractor.java:108)
at technology.tabula.PageIterator.next(PageIterator.java:29)
at technology.tabula.CommandLineApp.extractTables(CommandLineApp.java:142)
at technology.tabula.CommandLineApp.main(CommandLineApp.java:60)
Dec 11, 2016 4:11:32 PM org.apache.pdfbox.pdmodel.graphics.xobject.PDXObjectImage mask
WARNING: Colour key masking isn't supported
Dec 11, 2016 4:11:33 PM org.apache.pdfbox.util.PDFStreamEngine processOperator
WARNING: java.lang.ArrayIndexOutOfBoundsException
java.lang.ArrayIndexOutOfBoundsException
Dec 11, 2016 4:11:33 PM org.apache.pdfbox.util.PDFStreamEngine processOperator
WARNING: java.lang.ArrayIndexOutOfBoundsException
java.lang.ArrayIndexOutOfBoundsException
Dec 11, 2016 4:11:33 PM org.apache.pdfbox.util.PDFStreamEngine processOperator
WARNING: java.lang.ArrayIndexOutOfBoundsException
java.lang.ArrayIndexOutOfBoundsException