0

我试图使用 tabula 命令行将 PDF 转换为 csv,但它给出了错误

java -jar ./tabula-0.9.1-jar-with-dependencies.jar -p all  -o data_table.csv file1.pdf.

任何人都可以请帮忙。

Dec 11, 2016 4:11:31 PM org.apache.pdfbox.pdmodel.graphics.xobject.PDXObjectImage mask
WARNING: Colour key masking isn't supported
Dec 11, 2016 4:11:32 PM org.apache.pdfbox.util.PDFStreamEngine processOperator
WARNING: java.lang.ArrayIndexOutOfBoundsException: 5
java.lang.ArrayIndexOutOfBoundsException: 5
    at java.awt.geom.Path2D$Float$TxIterator.currentSegment(Path2D.java:1029)
    at technology.tabula.ObjectExtractor.strokeOrFillPath(ObjectExtractor.java:226)
    at technology.tabula.ObjectExtractor.strokePath(ObjectExtractor.java:275)
    at org.apache.pdfbox.util.operator.pagedrawer.StrokePath.process(StrokePath.java:47)
    at org.apache.pdfbox.util.PDFStreamEngine.processOperator(PDFStreamEngine.java:557)
    at org.apache.pdfbox.util.PDFStreamEngine.processOperator(PDFStreamEngine.java:532)
    at org.apache.pdfbox.util.operator.pagedrawer.FillNonZeroAndStrokePath.process(FillNonZeroAndStrokePath.java:52)
    at org.apache.pdfbox.util.PDFStreamEngine.processOperator(PDFStreamEngine.java:557)
    at org.apache.pdfbox.util.PDFStreamEngine.processOperator(PDFStreamEngine.java:532)
    at org.apache.pdfbox.util.operator.pagedrawer.CloseFillNonZeroAndStrokePath.process(CloseFillNonZeroAndStrokePath.java:47)
    at org.apache.pdfbox.util.PDFStreamEngine.processOperator(PDFStreamEngine.java:557)
    at org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:268)
    at org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:235)
    at org.apache.pdfbox.util.PDFStreamEngine.processStream(PDFStreamEngine.java:215)
    at technology.tabula.ObjectExtractor.drawPage(ObjectExtractor.java:153)
    at technology.tabula.ObjectExtractor.extractPage(ObjectExtractor.java:108)
    at technology.tabula.PageIterator.next(PageIterator.java:29)
    at technology.tabula.CommandLineApp.extractTables(CommandLineApp.java:142)
    at technology.tabula.CommandLineApp.main(CommandLineApp.java:60)

Dec 11, 2016 4:11:32 PM org.apache.pdfbox.util.PDFStreamEngine processOperator
WARNING: java.lang.ArrayIndexOutOfBoundsException: 5
java.lang.ArrayIndexOutOfBoundsException: 5
    at java.awt.geom.Path2D$Float$TxIterator.currentSegment(Path2D.java:1029)
    at technology.tabula.ObjectExtractor.strokeOrFillPath(ObjectExtractor.java:226)
    at technology.tabula.ObjectExtractor.strokePath(ObjectExtractor.java:275)
    at org.apache.pdfbox.util.operator.pagedrawer.StrokePath.process(StrokePath.java:47)
    at org.apache.pdfbox.util.PDFStreamEngine.processOperator(PDFStreamEngine.java:557)
    at org.apache.pdfbox.util.PDFStreamEngine.processOperator(PDFStreamEngine.java:532)
    at org.apache.pdfbox.util.operator.pagedrawer.FillNonZeroAndStrokePath.process(FillNonZeroAndStrokePath.java:52)
    at org.apache.pdfbox.util.PDFStreamEngine.processOperator(PDFStreamEngine.java:557)
    at org.apache.pdfbox.util.PDFStreamEngine.processOperator(PDFStreamEngine.java:532)
    at org.apache.pdfbox.util.operator.pagedrawer.CloseFillNonZeroAndStrokePath.process(CloseFillNonZeroAndStrokePath.java:47)
    at org.apache.pdfbox.util.PDFStreamEngine.processOperator(PDFStreamEngine.java:557)
    at org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:268)
    at org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:235)
    at org.apache.pdfbox.util.PDFStreamEngine.processStream(PDFStreamEngine.java:215)
    at technology.tabula.ObjectExtractor.drawPage(ObjectExtractor.java:153)
    at technology.tabula.ObjectExtractor.extractPage(ObjectExtractor.java:108)
    at technology.tabula.PageIterator.next(PageIterator.java:29)
    at technology.tabula.CommandLineApp.extractTables(CommandLineApp.java:142)
    at technology.tabula.CommandLineApp.main(CommandLineApp.java:60)

Dec 11, 2016 4:11:32 PM org.apache.pdfbox.pdmodel.graphics.xobject.PDXObjectImage mask
WARNING: Colour key masking isn't supported
Dec 11, 2016 4:11:33 PM org.apache.pdfbox.util.PDFStreamEngine processOperator
WARNING: java.lang.ArrayIndexOutOfBoundsException
java.lang.ArrayIndexOutOfBoundsException

Dec 11, 2016 4:11:33 PM org.apache.pdfbox.util.PDFStreamEngine processOperator
WARNING: java.lang.ArrayIndexOutOfBoundsException
java.lang.ArrayIndexOutOfBoundsException

Dec 11, 2016 4:11:33 PM org.apache.pdfbox.util.PDFStreamEngine processOperator
WARNING: java.lang.ArrayIndexOutOfBoundsException
java.lang.ArrayIndexOutOfBoundsException
4

0 回答 0